From 94e7dd03e2e094b532947fc3528d79e61b92aa20 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 28 Nov 2020 10:36:46 -0800 Subject: [PATCH 1/4] BUG: Categorical[dt64tz].to_numpy() losing tz --- pandas/conftest.py | 10 ++++++++++ pandas/core/arrays/categorical.py | 12 +++++------- pandas/tests/base/test_conversion.py | 20 ++++++++++++++++++-- 3 files changed, 33 insertions(+), 9 deletions(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index a0ec6f96042fc..c368cdb07d834 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -321,6 +321,16 @@ def index_or_series(request): index_or_series2 = index_or_series +@pytest.fixture( + params=[pd.Index, pd.Series, pd.array], ids=["index", "series", "array"] +) +def index_or_series_or_array(request): + """ + Fixture to parametrize over Index, Series, and ExtensionArray + """ + return request.param + + @pytest.fixture def dict_subclass(): """ diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index fe66aae23f510..666b16f974971 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1269,15 +1269,13 @@ def __array__(self, dtype=None) -> np.ndarray: if dtype==None (default), the same dtype as categorical.categories.dtype. """ - ret = take_1d(self.categories.values, self._codes) + ret = take_1d(self.categories._values, self._codes) if dtype and not is_dtype_equal(dtype, self.categories.dtype): return np.asarray(ret, dtype) - if is_extension_array_dtype(ret): - # When we're a Categorical[ExtensionArray], like Interval, - # we need to ensure __array__ get's all the way to an - # ndarray. - ret = np.asarray(ret) - return ret + # When we're a Categorical[ExtensionArray], like Interval, + # we need to ensure __array__ get's all the way to an + # ndarray. + return np.asarray(ret) def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): # for binary ops, use our custom dunder methods diff --git a/pandas/tests/base/test_conversion.py b/pandas/tests/base/test_conversion.py index a6fdb82e48197..668954a3f4a0b 100644 --- a/pandas/tests/base/test_conversion.py +++ b/pandas/tests/base/test_conversion.py @@ -316,18 +316,34 @@ def test_array_multiindex_raises(): TimedeltaArray(np.array([0, 3600000000000], dtype="i8"), freq="H"), np.array([0, 3600000000000], dtype="m8[ns]"), ), + # GH#26406 tz is preserved in Categorical[dt64tz] + ( + pd.Categorical(pd.date_range("2016-01-01", periods=2, tz="US/Pacific")), + np.array( + [ + Timestamp("2016-01-01", tz="US/Pacific"), + Timestamp("2016-01-02", tz="US/Pacific"), + ] + ), + ), ], ) -def test_to_numpy(array, expected, index_or_series): - box = index_or_series +def test_to_numpy(array, expected, index_or_series_or_array): + box = index_or_series_or_array thing = box(array) if array.dtype.name in ("Int64", "Sparse[int64, 0]") and box is pd.Index: pytest.skip(f"No index type for {array.dtype}") + if array.dtype.name == "int64" and box is pd.array: + pytest.xfail("thing is Int64 and to_numpy() returns object") + result = thing.to_numpy() tm.assert_numpy_array_equal(result, expected) + result = np.asarray(thing) + tm.assert_numpy_array_equal(result, expected) + @pytest.mark.parametrize("as_series", [True, False]) @pytest.mark.parametrize( From a793594072609a929d828368fd45c06a67f8cbd2 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 28 Nov 2020 12:41:43 -0800 Subject: [PATCH 2/4] Update pandas/core/arrays/categorical.py Co-authored-by: gfyoung --- pandas/core/arrays/categorical.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 666b16f974971..3995e7b251184 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1273,7 +1273,7 @@ def __array__(self, dtype=None) -> np.ndarray: if dtype and not is_dtype_equal(dtype, self.categories.dtype): return np.asarray(ret, dtype) # When we're a Categorical[ExtensionArray], like Interval, - # we need to ensure __array__ get's all the way to an + # we need to ensure __array__ gets all the way to an # ndarray. return np.asarray(ret) From e1081f8847b758bd7150eb1e740a82e4bc8d22ea Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 29 Nov 2020 08:13:56 -0800 Subject: [PATCH 3/4] Avoid FutureWarning --- pandas/core/series.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index d493ac0a8c051..298e5fcc1e575 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -84,7 +84,13 @@ from pandas.core.generic import NDFrame from pandas.core.indexers import deprecate_ndim_indexing, unpack_1tuple from pandas.core.indexes.accessors import CombinedDatetimelikeProperties -from pandas.core.indexes.api import Float64Index, Index, MultiIndex, ensure_index +from pandas.core.indexes.api import ( + CategoricalIndex, + Float64Index, + Index, + MultiIndex, + ensure_index, +) import pandas.core.indexes.base as ibase from pandas.core.indexes.datetimes import DatetimeIndex from pandas.core.indexes.period import PeriodIndex @@ -412,7 +418,13 @@ def _set_axis(self, axis: int, labels, fastpath: bool = False) -> None: labels = ensure_index(labels) if labels._is_all_dates: - if not isinstance(labels, (DatetimeIndex, PeriodIndex, TimedeltaIndex)): + deep_labels = labels + if isinstance(labels, CategoricalIndex): + deep_labels = labels.categories + + if not isinstance( + deep_labels, (DatetimeIndex, PeriodIndex, TimedeltaIndex) + ): try: labels = DatetimeIndex(labels) # need to set here because we changed the index From 2771bf135934861ac515cdc5515df5de6c18ea01 Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 29 Nov 2020 12:20:49 -0800 Subject: [PATCH 4/4] whatsnew --- doc/source/whatsnew/v1.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index bb06bcc9b5aa8..3fab4850dd1ec 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -533,7 +533,7 @@ Categorical - Bug in :meth:`Categorical.__setitem__` that incorrectly raised when trying to set a tuple value (:issue:`20439`) - Bug in :meth:`CategoricalIndex.equals` incorrectly casting non-category entries to ``np.nan`` (:issue:`37667`) - Bug in :meth:`CategoricalIndex.where` incorrectly setting non-category entries to ``np.nan`` instead of raising ``TypeError`` (:issue:`37977`) -- +- Bug in :meth:`Categorical.to_numpy` and ``np.array(categorical)`` with timezone-aware ``datetime64`` categories incorrectly dropping the timezone information instead of casting to object dtype (:issue:`38136`) Datetimelike ^^^^^^^^^^^^