Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions doc/source/whatsnew/v0.24.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1162,6 +1162,7 @@ Offsets
- Bug in :class:`FY5253` where date offsets could incorrectly raise an ``AssertionError`` in arithmetic operatons (:issue:`14774`)
- Bug in :class:`DateOffset` where keyword arguments ``week`` and ``milliseconds`` were accepted and ignored. Passing these will now raise ``ValueError`` (:issue:`19398`)
- Bug in adding :class:`DateOffset` with :class:`DataFrame` or :class:`PeriodIndex` incorrectly raising ``TypeError`` (:issue:`23215`)
- Bug in comparing :class:`DateOffset` objects with non-DateOffset objects, particularly strings, raising ``ValueError`` instead of returning ``False`` for equality checks and ``True`` for not-equal checks (:issue:`23524`)

Numeric
^^^^^^^
Expand Down Expand Up @@ -1356,3 +1357,5 @@ Other
- :meth:`~pandas.io.formats.style.Styler.bar` now also supports tablewise application (in addition to rowwise and columnwise) with ``axis=None`` and setting clipping range with ``vmin`` and ``vmax`` (:issue:`21548` and :issue:`21526`). ``NaN`` values are also handled properly.
- Logical operations ``&, |, ^`` between :class:`Series` and :class:`Index` will no longer raise ``ValueError`` (:issue:`22092`)
- Bug in :meth:`DataFrame.combine_first` in which column types were unexpectedly converted to float (:issue:`20699`)
- Bug in :class:`Index` where passing a timezone-aware :class:`DatetimeIndex` and `dtype=object` would incorrectly raise a ``ValueError`` (:issue:`23524`)
- Bug in :class:`DatetimeIndex` where calling `np.array(dtindex, dtype=object)` would incorrectly return an array of ``long`` objects (:issue:`23524`)
7 changes: 5 additions & 2 deletions pandas/_libs/tslibs/offsets.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -306,8 +306,11 @@ class _BaseOffset(object):

def __eq__(self, other):
if is_string_object(other):
other = to_offset(other)

try:
other = to_offset(other)
except ValueError:
# e.g. "infer"
return False
try:
return self._params == other._params
except AttributeError:
Expand Down
10 changes: 10 additions & 0 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -374,6 +374,16 @@ def _resolution(self):
# ----------------------------------------------------------------
# Array-like Methods

def __array__(self, dtype=None):
if is_object_dtype(dtype):
return np.array(list(self), dtype=object)
elif dtype == 'i8':
return self.asi8
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can remove this elif branch. Numpy will afterwards convert the M8[ns] data to int, and in that way ensure the semantics of np.asarray regarding copy/no copy is followed.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jbrockmendel I opened #23593, a PR doing the __array__ for all datetimelike EAs, not only DatetimeArray (but so there is a bit of overlap with this PR)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great, I'll take a look at 23593.


# TODO: warn that dtype is not used?
# warn that conversion may be lossy?
return self._data.view(np.ndarray) # follow Index.__array__
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Other question: what is the .view(np.ndarray) part doing if it is already an array? Can we remove it?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It can probably be removed; this is taken directly from the Index.__array__ implementation, so I think the maybe-removing this should be done at the same time those methods are overhauled (ill be opening an Issue shortly)


def __iter__(self):
"""
Return an iterator over the boxed values
Expand Down
12 changes: 9 additions & 3 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,11 +301,17 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None,
(dtype is not None and is_datetime64_any_dtype(dtype)) or
'tz' in kwargs):
from pandas import DatetimeIndex
result = DatetimeIndex(data, copy=copy, name=name,
dtype=dtype, **kwargs)

if dtype is not None and is_dtype_equal(_o_dtype, dtype):
return Index(result.to_pydatetime(), dtype=_o_dtype)
# GH#23524 passing `dtype=object` to DatetimeIndex is invalid,
# will raise in the where `data` is already tz-aware. So
# we leave it out of this step and cast to object-dtype after
# the DatetimeIndex construction.
result = DatetimeIndex(data, copy=copy, name=name, **kwargs)
return Index(list(result), dtype=_o_dtype)
else:
result = DatetimeIndex(data, copy=copy, name=name,
dtype=dtype, **kwargs)
return result

elif (is_timedelta64_dtype(data) or
Expand Down
25 changes: 25 additions & 0 deletions pandas/tests/arrays/test_datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,31 @@ def timedelta_index(request):

class TestDatetimeArray(object):

def test_array_object_dtype(self, tz_naive_fixture):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It think it would be good to add such a test to the base extension tests as well?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know those tests well enough to have an informed opinion. AFAIK ExtensionArray doesn't implement __array__, so it isn't clear that this is supported in the general case.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

EA implements __iter__, which should be sufficient.

This test would be slightly opinionated for a base test, in case an EA wants to be converted to a specific NumPy type, but I think it's OK.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess we have base.interface.BaseInterfaceTests.test_array_interface which checks

    def test_array_interface(self, data):
        result = np.array(data)
        assert result[0] == data[0]

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, yes, that's already a generic test. OK, since that does not actually test the return dtype, it's good to have more explicit tests here.

Should we expect from EA that np.array(EA, dtype=object) always works (returns an object array of scalars)?
That seems like an OK assumption to me, since this already happens if you don't implement __array__, so we can expect this as well if the EA author implements a custom __array__ I think.

# GH#23524
tz = tz_naive_fixture
dti = pd.date_range('2016-01-01', periods=3, tz=tz)
arr = DatetimeArrayMixin(dti)

expected = np.array(list(dti))

result = np.array(arr, dtype=object)
tm.assert_numpy_array_equal(result, expected)

# also test the DatetimeIndex method while we're at it
result = np.array(dti, dtype=object)
tm.assert_numpy_array_equal(result, expected)

def test_array(self, tz_naive_fixture):
# GH#23524
tz = tz_naive_fixture
dti = pd.date_range('2016-01-01', periods=3, tz=tz)
arr = DatetimeArrayMixin(dti)

expected = dti.asi8.view('M8[ns]')
result = np.array(arr)
tm.assert_numpy_array_equal(result, expected)

def test_from_dti(self, tz_naive_fixture):
tz = tz_naive_fixture
dti = pd.date_range('2016-01-01', periods=3, tz=tz)
Expand Down
12 changes: 10 additions & 2 deletions pandas/tests/indexes/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ def test_construction_list_tuples_nan(self, na_value, vtype):
@pytest.mark.parametrize("cast_as_obj", [True, False])
@pytest.mark.parametrize("index", [
pd.date_range('2015-01-01 10:00', freq='D', periods=3,
tz='US/Eastern'), # DTI with tz
tz='US/Eastern', name='Green Eggs & Ham'), # DTI with tz
pd.date_range('2015-01-01 10:00', freq='D', periods=3), # DTI no tz
pd.timedelta_range('1 days', freq='D', periods=3), # td
pd.period_range('2015-01-01', freq='D', periods=3) # period
Expand All @@ -145,8 +145,16 @@ def test_constructor_from_index_dtlike(self, cast_as_obj, index):

tm.assert_index_equal(result, index)

if isinstance(index, pd.DatetimeIndex) and hasattr(index, 'tz'):
if isinstance(index, pd.DatetimeIndex):
assert result.tz == index.tz
if cast_as_obj:
# GH#23524 check that Index(dti, dtype=object) does not
# incorrectly raise ValueError, and that nanoseconds are not
# dropped
index += pd.Timedelta(nanoseconds=50)
result = pd.Index(index, dtype=object)
assert result.dtype == np.object_
assert list(result) == list(index)

@pytest.mark.parametrize("index,has_tz", [
(pd.date_range('2015-01-01 10:00', freq='D', periods=3,
Expand Down
13 changes: 13 additions & 0 deletions pandas/tests/tseries/offsets/test_offsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,19 @@ def testMult2(self):
assert self.d + (-5 * self._offset(-10)) == self.d + self._offset(50)
assert self.d + (-3 * self._offset(-2)) == self.d + self._offset(6)

def test_compare_str(self):
# GH#23524
# comparing to strings that cannot be cast to DateOffsets should
# not raise for __eq__ or __ne__
if self._offset is None:
return
off = self._get_offset(self._offset)

assert not off == "infer"
assert off != "foo"
# Note: inequalities are only implemented for Tick subclasses;
# tests for this are in test_ticks


class TestCommon(Base):
# exected value created by Base._get_offset
Expand Down
22 changes: 22 additions & 0 deletions pandas/tests/tseries/offsets/test_ticks.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,3 +267,25 @@ def test_compare_ticks(cls):
assert cls(4) > three
assert cls(3) == cls(3)
assert cls(3) != cls(4)


@pytest.mark.parametrize('cls', tick_classes)
def test_compare_ticks_to_strs(cls):
# GH#23524
off = cls(19)

# These tests should work with any strings, but we particularly are
# interested in "infer" as that comparison is convenient to make in
# Datetime/Timedelta Array/Index constructors
assert not off == "infer"
assert not "foo" == off

for left, right in [("infer", off), (off, "infer")]:
with pytest.raises(TypeError):
left < right
with pytest.raises(TypeError):
left <= right
with pytest.raises(TypeError):
left > right
with pytest.raises(TypeError):
left >= right
27 changes: 20 additions & 7 deletions pandas/tseries/offsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -2199,9 +2199,18 @@ def apply_index(self, i):


def _tick_comp(op):
assert op not in [operator.eq, operator.ne]

def f(self, other):
return op(self.delta, other.delta)
try:
return op(self.delta, other.delta)
except AttributeError:
# comparing with a non-Tick object
raise TypeError("Invalid comparison between {cls} and {typ}"
.format(cls=type(self).__name__,
typ=type(other).__name__))

f.__name__ = '__{opname}__'.format(opname=op.__name__)
return f


Expand All @@ -2220,8 +2229,6 @@ def __init__(self, n=1, normalize=False):
__ge__ = _tick_comp(operator.ge)
__lt__ = _tick_comp(operator.lt)
__le__ = _tick_comp(operator.le)
__eq__ = _tick_comp(operator.eq)
__ne__ = _tick_comp(operator.ne)

def __add__(self, other):
if isinstance(other, Tick):
Expand All @@ -2242,8 +2249,11 @@ def __add__(self, other):
def __eq__(self, other):
if isinstance(other, compat.string_types):
from pandas.tseries.frequencies import to_offset

other = to_offset(other)
try:
other = to_offset(other)
except ValueError:
# e.g. "infer"
return False

if isinstance(other, Tick):
return self.delta == other.delta
Expand All @@ -2258,8 +2268,11 @@ def __hash__(self):
def __ne__(self, other):
if isinstance(other, compat.string_types):
from pandas.tseries.frequencies import to_offset

other = to_offset(other)
try:
other = to_offset(other)
except ValueError:
# e.g. "infer"
return True

if isinstance(other, Tick):
return self.delta != other.delta
Expand Down