diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index bc1d7cb52e196..87dd48b3569d8 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -554,3 +554,16 @@ def value_counts(self, dropna: bool = True): return Series( result._values.to_numpy(), index=result.index, name=result.name, copy=False ) + + def _reduce( + self, name: str, *, skipna: bool = True, keepdims: bool = False, **kwargs + ): + if name in ["any", "all"]: + arr = pc.and_kleene( + pc.invert(pc.is_null(self._pa_array)), pc.not_equal(self._pa_array, "") + ) + return ArrowExtensionArray(arr)._reduce( + name, skipna=skipna, keepdims=keepdims, **kwargs + ) + else: + return super()._reduce(name, skipna=skipna, keepdims=keepdims, **kwargs) diff --git a/pandas/tests/extension/base/reduce.py b/pandas/tests/extension/base/reduce.py index a6532a6190467..43b2df4290eed 100644 --- a/pandas/tests/extension/base/reduce.py +++ b/pandas/tests/extension/base/reduce.py @@ -25,7 +25,7 @@ def check_reduce(self, s, op_name, skipna): try: alt = s.astype("float64") - except TypeError: + except (TypeError, ValueError): # e.g. Interval can't cast, so let's cast to object and do # the reduction pointwise alt = s.astype(object) diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py index 069d53aeb248f..5176289994033 100644 --- a/pandas/tests/extension/test_string.py +++ b/pandas/tests/extension/test_string.py @@ -157,6 +157,12 @@ def test_fillna_no_op_returns_copy(self, data): class TestReduce(base.BaseReduceTests): + def _supports_reduction(self, ser: pd.Series, op_name: str) -> bool: + return ( + ser.dtype.storage == "pyarrow_numpy" # type: ignore[union-attr] + and op_name in ("any", "all") + ) + @pytest.mark.parametrize("skipna", [True, False]) def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna): op_name = all_numeric_reductions diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py index 87892a81cef3d..021252500e814 100644 --- a/pandas/tests/reductions/test_reductions.py +++ b/pandas/tests/reductions/test_reductions.py @@ -1078,6 +1078,25 @@ def test_any_all_datetimelike(self): assert df.any().all() assert not df.all().any() + def test_any_all_pyarrow_string(self): + # GH#54591 + pytest.importorskip("pyarrow") + ser = Series(["", "a"], dtype="string[pyarrow_numpy]") + assert ser.any() + assert not ser.all() + + ser = Series([None, "a"], dtype="string[pyarrow_numpy]") + assert ser.any() + assert not ser.all() + + ser = Series([None, ""], dtype="string[pyarrow_numpy]") + assert not ser.any() + assert not ser.all() + + ser = Series(["a", "b"], dtype="string[pyarrow_numpy]") + assert ser.any() + assert ser.all() + def test_timedelta64_analytics(self): # index min/max dti = date_range("2012-1-1", periods=3, freq="D")