Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v2.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ notable_bug_fix1

Deprecations
~~~~~~~~~~~~
-
- Deprecated allowing non-``bool`` values for ``na`` in :meth:`.str.contains`, :meth:`.str.startswith`, and :meth:`.str.endswith` for dtypes that do not already disallow these (:issue:`59615`)
-

.. ---------------------------------------------------------------------------
Expand Down
10 changes: 10 additions & 0 deletions pandas/core/arrays/string_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
TYPE_CHECKING,
Union,
)
import warnings

import numpy as np

Expand All @@ -19,6 +20,7 @@
pa_version_under10p1,
pa_version_under13p0,
)
from pandas.util._exceptions import find_stack_level

from pandas.core.dtypes.common import (
is_scalar,
Expand Down Expand Up @@ -295,6 +297,14 @@ def _str_contains(
result = pc.match_substring(self._pa_array, pat, ignore_case=not case)
result = self._result_converter(result, na=na)
if not isna(na):
if not isinstance(na, bool):
# GH#59561
warnings.warn(
"Allowing a non-bool 'na' in obj.str.contains is deprecated "
"and will raise in a future version.",
FutureWarning,
stacklevel=find_stack_level(),
)
result[isna(result)] = bool(na)
return result

Expand Down
26 changes: 26 additions & 0 deletions pandas/core/strings/object_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,14 @@
cast,
)
import unicodedata
import warnings

import numpy as np

from pandas._libs import lib
import pandas._libs.missing as libmissing
import pandas._libs.ops as libops
from pandas.util._exceptions import find_stack_level

from pandas.core.dtypes.missing import isna

Expand Down Expand Up @@ -142,14 +144,38 @@ def _str_contains(
else:
upper_pat = pat.upper()
f = lambda x: upper_pat in x.upper()
if not isna(na) and not isinstance(na, bool):
# GH#59561
warnings.warn(
"Allowing a non-bool 'na' in obj.str.contains is deprecated "
"and will raise in a future version.",
FutureWarning,
stacklevel=find_stack_level(),
)
return self._str_map(f, na, dtype=np.dtype("bool"))

def _str_startswith(self, pat, na=None):
f = lambda x: x.startswith(pat)
if not isna(na) and not isinstance(na, bool):
# GH#59561
warnings.warn(
"Allowing a non-bool 'na' in obj.str.startswith is deprecated "
"and will raise in a future version.",
FutureWarning,
stacklevel=find_stack_level(),
)
return self._str_map(f, na_value=na, dtype=np.dtype(bool))

def _str_endswith(self, pat, na=None):
f = lambda x: x.endswith(pat)
if not isna(na) and not isinstance(na, bool):
# GH#59561
warnings.warn(
"Allowing a non-bool 'na' in obj.str.endswith is deprecated "
"and will raise in a future version.",
FutureWarning,
stacklevel=find_stack_level(),
)
return self._str_map(f, na_value=na, dtype=np.dtype(bool))

def _str_replace(
Expand Down
46 changes: 44 additions & 2 deletions pandas/tests/strings/test_find_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,16 @@ def test_contains_na_kwarg_for_nullable_string_dtype(
# https://github.com/pandas-dev/pandas/pull/41025#issuecomment-824062416

values = Series(["a", "b", "c", "a", np.nan], dtype=nullable_string_dtype)
result = values.str.contains("a", na=na, regex=regex)

msg = (
"Allowing a non-bool 'na' in obj.str.contains is deprecated and "
"will raise in a future version"
)
warn = None
if not pd.isna(na) and not isinstance(na, bool):
warn = FutureWarning
with tm.assert_produces_warning(warn, match=msg):
result = values.str.contains("a", na=na, regex=regex)
expected = Series([True, False, False, True, expected], dtype="boolean")
tm.assert_series_equal(result, expected)

Expand Down Expand Up @@ -232,7 +241,12 @@ def test_contains_nan(any_string_dtype):
expected = Series([True, True, True], dtype=expected_dtype)
tm.assert_series_equal(result, expected)

result = s.str.contains("foo", na="foo")
msg = (
"Allowing a non-bool 'na' in obj.str.contains is deprecated and "
"will raise in a future version"
)
with tm.assert_produces_warning(FutureWarning, match=msg):
result = s.str.contains("foo", na="foo")
if any_string_dtype == "object":
expected = Series(["foo", "foo", "foo"], dtype=np.object_)
elif any_string_dtype.na_value is np.nan:
Expand All @@ -254,6 +268,34 @@ def test_contains_nan(any_string_dtype):
# --------------------------------------------------------------------------------------


def test_startswith_endswith_validate_na(any_string_dtype):
# GH#59615
ser = Series(
["om", np.nan, "foo_nom", "nom", "bar_foo", np.nan, "foo"],
dtype=any_string_dtype,
)

dtype = ser.dtype
if (
isinstance(dtype, pd.StringDtype) and dtype.storage == "python"
) or dtype == np.dtype("object"):
msg = "Allowing a non-bool 'na' in obj.str.startswith is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
ser.str.startswith("kapow", na="baz")
msg = "Allowing a non-bool 'na' in obj.str.endswith is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
ser.str.endswith("bar", na="baz")
else:
# TODO: don't surface pyarrow errors
import pyarrow as pa

msg = "Could not convert 'baz' with type str: tried to convert to boolean"
with pytest.raises(pa.lib.ArrowInvalid, match=msg):
ser.str.startswith("kapow", na="baz")
with pytest.raises(pa.lib.ArrowInvalid, match=msg):
ser.str.endswith("kapow", na="baz")


@pytest.mark.parametrize("pat", ["foo", ("foo", "baz")])
@pytest.mark.parametrize("dtype", ["object", "category"])
@pytest.mark.parametrize("null_value", [None, np.nan, pd.NA])
Expand Down