Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -752,8 +752,11 @@ cpdef ndarray[object] ensure_string_array(
out = arr.astype(str).astype(object)
out[arr.isna()] = na_value
return out

arr = arr.to_numpy()
if hasattr(arr, "type"):
# pyarrow array
arr = np.array(arr)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. can we handle this at a higher level? 2) why doesnt the pyarrow object's to_numpy work here?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It sets zero_copy_only to True which causes raising here.

we have to do a check like this somewhere as long as arrow is not a hard dependency, can do it at a higher level as well

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

moved it to a higher level, not sure if it is better though

else:
arr = arr.to_numpy()
elif not util.is_array(arr):
arr = np.array(arr, dtype="object")

Expand Down
2 changes: 2 additions & 0 deletions pandas/core/arrays/string_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,8 @@ def _from_sequence(cls, scalars, dtype: Dtype | None = None, copy: bool = False)
result = scalars._data
result = lib.ensure_string_array(result, copy=copy, convert_na_value=False)
return cls(pa.array(result, mask=na_values, type=pa.string()))
elif isinstance(scalars, (pa.Array, pa.ChunkedArray)):
return cls(pc.cast(scalars, pa.string()))

# convert non-na-likes to str
result = lib.ensure_string_array(scalars, copy=copy)
Expand Down
8 changes: 8 additions & 0 deletions pandas/tests/extension/test_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -2353,6 +2353,14 @@ def test_concat_empty_arrow_backed_series(dtype):
tm.assert_series_equal(result, expected)


@pytest.mark.parametrize("dtype", ["string", "string[pyarrow]"])
def test_series_from_string_array(dtype):
arr = pa.array("the quick brown fox".split())
ser = pd.Series(arr, dtype=dtype)
expected = pd.Series(ArrowExtensionArray(arr), dtype=dtype)
tm.assert_series_equal(ser, expected)


# _data was renamed to _pa_data
class OldArrowExtensionArray(ArrowExtensionArray):
def __getstate__(self):
Expand Down