Skip to content
36 changes: 36 additions & 0 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4938,6 +4938,26 @@ def get_indexer_for(self, target, **kwargs):
indexer, _ = self.get_indexer_non_unique(target)
return indexer

def _get_indexer_non_comparable(self, target, method, unique: bool = True):
"""
If we have non-comparable dtypes, there will never be any matches.

For get_indexer calls with a method, this is an _inequality_ check
which raises TypeError.
"""
if method is not None:
other = self._get_other_deep(target)
raise TypeError(f"Cannot compare dtypes {self.dtype} and {other.dtype}")

no_matches = -1 * np.ones(target.shape, dtype=np.intp)
if unique:
# This is for get_indexer
return no_matches
else:
# This is for get_indexer_non_unique
missing = np.arange(len(target), dtype=np.intp)
return no_matches, missing

@property
def _index_as_unique(self):
"""
Expand Down Expand Up @@ -4973,6 +4993,22 @@ def _maybe_promote(self, other: "Index"):

return self, other

def _get_other_deep(self, other: "Index") -> "Index":
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this should just be a function, no? its also a funny name. I think we have other similar things (e.g. this look like what .to_dense() does but for dtypes and not values).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

updated to _unpack_nested_dtypes function

dtype = other.dtype
if is_categorical_dtype(dtype):
# If there is ever a SparseIndex, this could get dispatched
# here too.
return dtype.categories
return other

def _should_compare(self, other: "Index") -> bool:
"""
Check if `self == other` can ever have non-False entries.
"""
other = self._get_other_deep(other)
dtype = other.dtype
return self._is_comparable_dtype(dtype) or is_object_dtype(dtype)

def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:
"""
Can we compare values of the given dtype to our own?
Expand Down
9 changes: 3 additions & 6 deletions pandas/core/indexes/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -452,13 +452,10 @@ def join(self, other, how="left", level=None, return_indexers=False, sort=False)
def get_indexer(self, target, method=None, limit=None, tolerance=None):
target = ensure_index(target)

if isinstance(target, PeriodIndex):
if not self._is_comparable_dtype(target.dtype):
# i.e. target.freq != self.freq
# No matches
no_matches = -1 * np.ones(self.shape, dtype=np.intp)
return no_matches
if not self._should_compare(target):
return self._get_indexer_non_comparable(target, method, unique=True)

if isinstance(target, PeriodIndex):
target = target._get_engine_target() # i.e. target.asi8
self_index = self._int64index
else:
Expand Down
53 changes: 53 additions & 0 deletions pandas/tests/indexes/period/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,28 @@
)
import pandas._testing as tm

dti4 = date_range("2016-01-01", periods=4)
dti = dti4[:-1]
rng = pd.Index(range(3))


@pytest.fixture(
params=[
dti,
dti.tz_localize("UTC"),
dti.to_period("W"),
dti - dti[0],
rng,
pd.Index([1, 2, 3]),
pd.Index([2.0, 3.0, 4.0]),
pd.Index([4, 5, 6], dtype="u8"),
pd.IntervalIndex.from_breaks(dti4),
]
)
def non_comparable(request):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

non_comparable_idx

# All have length 3
return request.param


class TestGetItem:
def test_ellipsis(self):
Expand Down Expand Up @@ -438,6 +460,37 @@ def test_get_indexer_mismatched_dtype(self):
result = pi.get_indexer_non_unique(pi2)[0]
tm.assert_numpy_array_equal(result, expected)

def test_get_indexer_mismatched_dtype_different_length(self, non_comparable):
# without method we arent checking inequalities, so get all-missing
# but do not raise
dti = date_range("2016-01-01", periods=3)
pi = dti.to_period("D")

other = non_comparable

res = pi[:-1].get_indexer(other)
expected = -np.ones(other.shape, dtype=np.intp)
tm.assert_numpy_array_equal(res, expected)

@pytest.mark.parametrize("method", ["pad", "backfill", "nearest"])
def test_get_indexer_mismatched_dtype_with_method(self, non_comparable, method):
dti = date_range("2016-01-01", periods=3)
pi = dti.to_period("D")

other = non_comparable

msg = re.escape(f"Cannot compare dtypes {pi.dtype} and {other.dtype}")
with pytest.raises(TypeError, match=msg):
pi.get_indexer(other, method=method)

for dtype in ["object", "category"]:
other2 = other.astype(dtype)
if dtype == "object" and isinstance(other, PeriodIndex):
continue
# For object dtype we are liable to get a different exception message
with pytest.raises(TypeError):
pi.get_indexer(other2, method=method)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jreback notice in these cases we are currently raising in master bc the scalar comparisons raise

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this new?

get_indexer is not super public but i believe it will never raise

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you're right that get_indexer with method=None should never raise (maybe with tzawareness corner cases), but with method="ffill" the following raises on master:

dti = pd.date_range("2016-01-01", periods=3)
rng = pd.Index(range(5))

>>> dti.get_indexer(rng, method="ffill")
TypeError: '<' not supported between instances of 'int' and 'Timestamp'


def test_get_indexer_non_unique(self):
# GH 17717
p1 = Period("2017-09-02")
Expand Down