Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions pandas/core/arrays/sparse/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
is_datetime64tz_dtype,
is_dtype_equal,
is_integer,
is_list_like,
is_object_dtype,
is_scalar,
is_string_dtype,
Expand Down Expand Up @@ -925,6 +926,14 @@ def __getitem__(
indices = np.arange(len(self), dtype=np.int32)[key]
return self.take(indices)

elif not is_list_like(key):
# e.g. "foo" or 2.5
# exception message copied from numpy
raise IndexError(
r"only integers, slices (`:`), ellipsis (`...`), numpy.newaxis "
r"(`None`) and integer or boolean arrays are valid indices"
)

else:
# TODO: I think we can avoid densifying when masking a
# boolean SparseArray with another. Need to look at the
Expand Down
12 changes: 12 additions & 0 deletions pandas/core/arrays/string_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,6 +324,13 @@ def __getitem__(
elif item[1] is Ellipsis:
item = item[0]

if is_scalar(item) and not is_integer(item):
# e.g. "foo" or 2.5
# exception message copied from numpy
raise IndexError(
r"only integers, slices (`:`), ellipsis (`...`), numpy.newaxis "
r"(`None`) and integer or boolean arrays are valid indices"
)
# We are not an array indexer, so maybe e.g. a slice or integer
# indexer. We dispatch to pyarrow.
value = self._data[item]
Expand Down Expand Up @@ -443,6 +450,11 @@ def _cmp_method(self, other, op):
# TODO(ARROW-9429): Add a .to_numpy() to ChunkedArray
return BooleanArray._from_sequence(result.to_pandas().values)

def insert(self, loc: int, item):
if not isinstance(item, str) and item is not libmissing.NA:
raise TypeError("Scalar must be NA or str")
return super().insert(loc, item)

def __setitem__(self, key: int | slice | np.ndarray, value: Any) -> None:
"""Set one or more values inplace.

Expand Down
27 changes: 27 additions & 0 deletions pandas/tests/extension/base/getitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,33 @@ def test_getitem_scalar(self, data):
result = pd.Series(data)[0]
assert isinstance(result, data.dtype.type)

def test_getitem_invalid(self, data):
# TODO: box over scalar, [scalar], (scalar,)?

msg = (
r"only integers, slices \(`:`\), ellipsis \(`...`\), numpy.newaxis "
r"\(`None`\) and integer or boolean arrays are valid indices"
)
with pytest.raises(IndexError, match=msg):
data["foo"]
with pytest.raises(IndexError, match=msg):
data[2.5]

ub = len(data)
msg = "|".join(
[
"list index out of range", # json
"index out of bounds", # pyarrow
"Out of bounds access", # Sparse
f"index {ub+1} is out of bounds for axis 0 with size {ub}",
f"index -{ub+1} is out of bounds for axis 0 with size {ub}",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  with pytest.raises(IndexError, match=msg):

E AssertionError: Regex pattern 'list index out of range|index out of bounds|Out of bounds access|index 101 is out of bounds for axis 0 with size 100|index -101 is out of bounds for axis 0 with size 100' does not match 'loc must be an integer between -100 and 100'.

i think these are asserting on 3.10 builds

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should be addressed by just-pushed commit in #44242

]
)
with pytest.raises(IndexError, match=msg):
data[ub + 1]
with pytest.raises(IndexError, match=msg):
data[-ub - 1]

def test_getitem_scalar_na(self, data_missing, na_cmp, na_value):
result = data_missing[0]
assert na_cmp(result, na_value)
Expand Down
8 changes: 8 additions & 0 deletions pandas/tests/extension/base/setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -367,3 +367,11 @@ def test_delitem_series(self, data):
expected = ser[taker]
del ser[1]
self.assert_series_equal(ser, expected)

def test_setitem_invalid(self, data, invalid_scalar):
msg = "" # messages vary by subclass, so we do not test it
with pytest.raises((ValueError, TypeError), match=msg):
data[0] = invalid_scalar

with pytest.raises((ValueError, TypeError), match=msg):
data[:] = invalid_scalar
12 changes: 11 additions & 1 deletion pandas/tests/extension/json/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,10 @@
from pandas._typing import type_t

from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
from pandas.core.dtypes.common import pandas_dtype
from pandas.core.dtypes.common import (
is_list_like,
pandas_dtype,
)

import pandas as pd
from pandas.api.extensions import (
Expand Down Expand Up @@ -103,6 +106,13 @@ def __getitem__(self, item):
elif isinstance(item, slice):
# slice
return type(self)(self.data[item])
elif not is_list_like(item):
# e.g. "foo" or 2.5
# exception message copied from numpy
raise IndexError(
r"only integers, slices (`:`), ellipsis (`...`), numpy.newaxis "
r"(`None`) and integer or boolean arrays are valid indices"
)
else:
item = pd.api.indexers.check_array_indexer(self, item)
if is_bool_dtype(item.dtype):
Expand Down
5 changes: 5 additions & 0 deletions pandas/tests/extension/test_numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,6 +363,11 @@ def test_concat(self, data, in_frame):


class TestSetitem(BaseNumPyTests, base.BaseSetitemTests):
@skip_nested
def test_setitem_invalid(self, data, invalid_scalar):
# object dtype can hold anything, so doesn't raise
super().test_setitem_invalid(data, invalid_scalar)

@skip_nested
def test_setitem_sequence_broadcasts(self, data, box_in_series):
# ValueError: cannot set using a list-like indexer with a different
Expand Down
7 changes: 0 additions & 7 deletions pandas/tests/extension/test_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,13 +160,6 @@ def test_value_counts(self, all_data, dropna):
def test_value_counts_with_normalize(self, data):
pass

def test_insert_invalid(self, data, invalid_scalar, request):
if data.dtype.storage == "pyarrow":
mark = pytest.mark.xfail(reason="casts invalid_scalar to string")
request.node.add_marker(mark)

super().test_insert_invalid(data, invalid_scalar)


class TestCasting(base.BaseCastingTests):
pass
Expand Down
26 changes: 26 additions & 0 deletions pandas/tests/indexes/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -403,6 +403,32 @@ def test_insert_base(self, index):
# test 0th element
assert index[0:4].equals(result.insert(0, index[0]))

def test_insert_out_of_bounds(self, index):
# TypeError/IndexError matches what np.insert raises in these cases

if len(index) > 0:
err = TypeError
else:
err = IndexError
if len(index) == 0:
msg = "index 0 is out of bounds for axis 0 with size 0"
else:
msg = "slice indices must be integers or None or have an __index__ method"
with pytest.raises(err, match=msg):
index.insert(0.5, "foo")

msg = "|".join(
[
r"index -?\d+ is out of bounds for axis 0 with size \d+",
"loc must be an integer between",
]
)
with pytest.raises(IndexError, match=msg):
index.insert(len(index) + 1, 1)

with pytest.raises(IndexError, match=msg):
index.insert(-len(index) - 1, 1)

def test_delete_base(self, index):
if not len(index):
return
Expand Down