From 6e49c2a79b4fc1259600f8f2826dbf181d75c519 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Mon, 20 Jan 2025 13:34:12 -0500 Subject: [PATCH 1/6] Add time_unit argument to CFTimeIndex.to_datetimeindex --- doc/whats-new.rst | 7 +++ xarray/coding/cftimeindex.py | 71 ++++++++++++++++------- xarray/tests/test_accessor_dt.py | 4 +- xarray/tests/test_cftime_offsets.py | 6 +- xarray/tests/test_cftimeindex.py | 52 +++++++++++------ xarray/tests/test_cftimeindex_resample.py | 6 +- xarray/tests/test_groupby.py | 4 +- xarray/tests/test_missing.py | 4 +- 8 files changed, 106 insertions(+), 48 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index fe698bc358b..a48d003de69 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -54,6 +54,13 @@ New Features By `Kai Mühlbauer `_ and `Spencer Clark `_. - Improve the error message raised when no key is matching the available variables in a dataset. (:pull:`9943`) By `Jimmy Westling `_. +- Added a ``time_unit`` argument to :py:meth:`CFTimeIndex.to_datetimeindex`. + Note that in a future version of xarray, + :py:meth:`CFTimeIndex.to_datetimeindex` will return a microsecond-resolution + :py:class:`pandas.DatetimeIndex` instead of a nanosecond-resolution + :py:class:`pandas.DatetimeIndex` (:pull:`9965`). By `Spencer Clark + `_ and `Kai Mühlbauer + `_. Breaking changes ~~~~~~~~~~~~~~~~ diff --git a/xarray/coding/cftimeindex.py b/xarray/coding/cftimeindex.py index bd5f51551c7..06f44399084 100644 --- a/xarray/coding/cftimeindex.py +++ b/xarray/coding/cftimeindex.py @@ -42,9 +42,8 @@ from __future__ import annotations import math -import warnings from datetime import timedelta -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING, Any, Optional import numpy as np import pandas as pd @@ -58,7 +57,8 @@ ) from xarray.core.common import _contains_cftime_datetimes from xarray.core.options import OPTIONS -from xarray.core.utils import attempt_import, is_scalar +from xarray.core.types import PDDatetimeUnitOptions +from xarray.core.utils import attempt_import, emit_user_level_warning, is_scalar if TYPE_CHECKING: from xarray.coding.cftime_offsets import BaseCFTimeOffset @@ -239,6 +239,8 @@ class CFTimeIndex(pd.Index): cftime_range """ + _data: np.ndarray + year = _field_accessor("year", "The year of the datetime") month = _field_accessor("month", "The month of the datetime") day = _field_accessor("day", "The days of the datetime") @@ -544,14 +546,18 @@ def __rsub__(self, other): "that can be expressed at the nanosecond resolution." ) from err - def to_datetimeindex(self, unsafe=False): + def to_datetimeindex( + self, unsafe: bool = False, time_unit: Optional[PDDatetimeUnitOptions] = None + ) -> pd.DatetimeIndex: """If possible, convert this index to a pandas.DatetimeIndex. Parameters ---------- unsafe : bool - Flag to turn off warning when converting from a CFTimeIndex with - a non-standard calendar to a DatetimeIndex (default ``False``). + Flag to turn off calendar mismatch warnings (default ``False``). + time_unit : str + Time resolution of resulting DatetimeIndex. Can be one of `"s"`, + ``"ms"``, ``"us"``, or ``"ns"`` (default ``"ns"``). Returns ------- @@ -561,18 +567,20 @@ def to_datetimeindex(self, unsafe=False): ------ ValueError If the CFTimeIndex contains dates that are not possible in the - standard calendar or outside the nanosecond-precision range. + standard calendar or outside the range representable by the + specified ``time_unit``. Warns ----- RuntimeWarning - If converting from a non-standard calendar to a DatetimeIndex. + If converting from a non-standard calendar, or a Gregorian + calendar with dates prior to the reform (1582-10-15). Warnings -------- - Note that for non-standard calendars, this will change the calendar - type of the index. In that case the result of this method should be - used with caution. + Note that for non-proleptic Gregorian calendars, this will change the + calendar type of the index. In that case the result of this method + should be used with caution. Examples -------- @@ -580,26 +588,47 @@ def to_datetimeindex(self, unsafe=False): >>> times CFTimeIndex([2000-01-01 00:00:00, 2000-01-02 00:00:00], dtype='object', length=2, calendar='standard', freq=None) - >>> times.to_datetimeindex() - DatetimeIndex(['2000-01-01', '2000-01-02'], dtype='datetime64[us]', freq=None) + >>> times.to_datetimeindex(time_unit="ns") + DatetimeIndex(['2000-01-01', '2000-01-02'], dtype='datetime64[ns]', freq=None) """ if not self._data.size: return pd.DatetimeIndex([]) - # transform to us-resolution is needed for DatetimeIndex - nptimes = cftime_to_nptime(self, time_unit="us") + if time_unit is None: + emit_user_level_warning( + "In a future version of xarray to_datetimeindex will default " + "to returning a 'us'-resolution DatetimeIndex instead of a " + "'ns'-resolution DatetimeIndex. This warning can be silenced " + "by explicitly setting the time_unit of the index returned.", + FutureWarning, + ) + time_unit = "ns" + + nptimes = cftime_to_nptime(self, time_unit=time_unit) calendar = infer_calendar_name(self) if calendar not in _STANDARD_CALENDARS and not unsafe: - warnings.warn( + emit_user_level_warning( "Converting a CFTimeIndex with dates from a non-standard " - f"calendar, {calendar!r}, to a pandas.DatetimeIndex, which uses dates " - "from the standard calendar. This may lead to subtle errors " - "in operations that depend on the length of time between " - "dates.", + f"calendar, {calendar!r}, to a pandas.DatetimeIndex, which " + "uses dates from the standard calendar. This may lead to " + "subtle errors in operations that depend on the length of " + "time between dates.", RuntimeWarning, - stacklevel=2, ) + if calendar == "standard" and not unsafe: + reform_date = self.date_type(1582, 10, 15) + if self.min() < reform_date: + emit_user_level_warning( + "Converting a CFTimeIndex with dates from a Gregorian " + "calendar that fall before the reform date of 1582-10-15 " + "to a pandas.DatetimeIndex. During this time period the " + "Gregorian calendar and the proleptic Gregorian calendar " + "of the DatetimeIndex do not exactly align. This warning " + "can be silenced by setting unsafe=True.", + RuntimeWarning, + ) + return pd.DatetimeIndex(nptimes) def strftime(self, date_format): diff --git a/xarray/tests/test_accessor_dt.py b/xarray/tests/test_accessor_dt.py index 64309966103..652b1752f12 100644 --- a/xarray/tests/test_accessor_dt.py +++ b/xarray/tests/test_accessor_dt.py @@ -519,7 +519,9 @@ def test_cftime_strftime_access(data) -> None: date_format = "%Y%m%d%H" result = data.time.dt.strftime(date_format) datetime_array = xr.DataArray( - xr.coding.cftimeindex.CFTimeIndex(data.time.values).to_datetimeindex(), + xr.coding.cftimeindex.CFTimeIndex(data.time.values).to_datetimeindex( + time_unit="ns" + ), name="stftime", coords=data.time.coords, dims=data.time.dims, diff --git a/xarray/tests/test_cftime_offsets.py b/xarray/tests/test_cftime_offsets.py index 4db73048548..ee2f2f06c4f 100644 --- a/xarray/tests/test_cftime_offsets.py +++ b/xarray/tests/test_cftime_offsets.py @@ -1672,7 +1672,7 @@ def test_new_to_legacy_freq_pd_freq_passthrough(freq, expected): ) def test_cftime_range_same_as_pandas(start, end, freq): result = date_range(start, end, freq=freq, calendar="standard", use_cftime=True) - result = result.to_datetimeindex() + result = result.to_datetimeindex(time_unit="ns") expected = date_range(start, end, freq=freq, use_cftime=False) np.testing.assert_array_equal(result, expected) @@ -1694,8 +1694,8 @@ def test_cftime_range_no_freq(start, end, periods): when freq is not provided, but start, end and periods are. """ # Generate date ranges using cftime_range - result = cftime_range(start=start, end=end, periods=periods) - result = result.to_datetimeindex() + cftimeindex = cftime_range(start=start, end=end, periods=periods) + result = cftimeindex.to_datetimeindex(time_unit="ns") expected = pd.date_range(start=start, end=end, periods=periods) np.testing.assert_array_equal(result, expected) diff --git a/xarray/tests/test_cftimeindex.py b/xarray/tests/test_cftimeindex.py index 8fc79a0cc53..6df6ebe748b 100644 --- a/xarray/tests/test_cftimeindex.py +++ b/xarray/tests/test_cftimeindex.py @@ -1219,43 +1219,59 @@ def test_strftime_of_cftime_array(calendar): @requires_cftime @pytest.mark.parametrize("calendar", _ALL_CALENDARS) @pytest.mark.parametrize("unsafe", [False, True]) -def test_to_datetimeindex(calendar, unsafe): +def test_to_datetimeindex(calendar, unsafe) -> None: index = xr.cftime_range("2000", periods=5, calendar=calendar) - expected = pd.date_range("2000", periods=5, unit="us") + expected = pd.date_range("2000", periods=5, unit="ns") if calendar in _NON_STANDARD_CALENDARS and not unsafe: with pytest.warns(RuntimeWarning, match="non-standard"): - result = index.to_datetimeindex() + result = index.to_datetimeindex(time_unit="ns") else: - result = index.to_datetimeindex(unsafe=unsafe) + result = index.to_datetimeindex(unsafe=unsafe, time_unit="ns") assert result.equals(expected) np.testing.assert_array_equal(result, expected) assert isinstance(result, pd.DatetimeIndex) +@requires_cftime +def test_to_datetimeindex_future_warning() -> None: + index = xr.cftime_range("2000", periods=5) + expected = pd.date_range("2000", periods=5, unit="ns") + with pytest.warns(FutureWarning, match="In a future version"): + result = index.to_datetimeindex() + assert result.equals(expected) + assert result.dtype == expected.dtype + + @requires_cftime @pytest.mark.parametrize("calendar", _ALL_CALENDARS) -def test_to_datetimeindex_out_of_range(calendar): +def test_to_datetimeindex_out_of_range(calendar) -> None: index = xr.cftime_range("0001", periods=5, calendar=calendar) - # todo: suggestion from code review: - # - still warn when converting from a non-standard calendar - # to a proleptic Gregorian calendar - # - also warn when converting from a Gregorian calendar - # to a proleptic Gregorian calendar when dates fall before the reform - if calendar in _NON_STANDARD_CALENDARS: - with pytest.warns(RuntimeWarning, match="non-standard"): - index.to_datetimeindex() + with pytest.raises(ValueError, match="0001"): + index.to_datetimeindex(time_unit="ns") + + +@requires_cftime +@pytest.mark.parametrize("unsafe", [False, True]) +def test_to_datetimeindex_gregorian_pre_reform(unsafe) -> None: + index = xr.cftime_range("1582", periods=5, calendar="gregorian") + if unsafe: + result = index.to_datetimeindex(time_unit="us", unsafe=unsafe) else: - index.to_datetimeindex() + with pytest.warns(RuntimeWarning, match="reform"): + result = index.to_datetimeindex(time_unit="us", unsafe=unsafe) + expected = pd.date_range("1582", periods=5, unit="us") + assert result.equals(expected) + assert result.dtype == expected.dtype @requires_cftime @pytest.mark.parametrize("calendar", ["all_leap", "360_day"]) -def test_to_datetimeindex_feb_29(calendar): +def test_to_datetimeindex_feb_29(calendar) -> None: index = xr.cftime_range("2001-02-28", periods=2, calendar=calendar) with pytest.raises(ValueError, match="29"): - index.to_datetimeindex() + index.to_datetimeindex(time_unit="ns") @pytest.mark.xfail(reason="fails on pandas main branch") @@ -1271,10 +1287,10 @@ def test_multiindex(): @pytest.mark.parametrize("method", ["floor", "ceil", "round"]) def test_rounding_methods_against_datetimeindex(freq, method): # for now unit="us" seems good enough - expected = pd.date_range("2000-01-02T01:03:51", periods=10, freq="1777s", unit="us") + expected = pd.date_range("2000-01-02T01:03:51", periods=10, freq="1777s", unit="ns") expected = getattr(expected, method)(freq) result = xr.cftime_range("2000-01-02T01:03:51", periods=10, freq="1777s") - result = getattr(result, method)(freq).to_datetimeindex() + result = getattr(result, method)(freq).to_datetimeindex(time_unit="ns") assert result.equals(expected) diff --git a/xarray/tests/test_cftimeindex_resample.py b/xarray/tests/test_cftimeindex_resample.py index 081970108a0..5510eaacf1f 100644 --- a/xarray/tests/test_cftimeindex_resample.py +++ b/xarray/tests/test_cftimeindex_resample.py @@ -97,7 +97,9 @@ def compare_against_pandas( ).mean() # TODO (benbovy - flexible indexes): update when CFTimeIndex is a xarray Index subclass result_cftimeindex["time"] = ( - result_cftimeindex.xindexes["time"].to_pandas_index().to_datetimeindex() + result_cftimeindex.xindexes["time"] + .to_pandas_index() + .to_datetimeindex(time_unit="ns") ) xr.testing.assert_identical(result_cftimeindex, result_datetimeindex) @@ -181,7 +183,7 @@ def test_calendars(calendar: str) -> None: # TODO (benbovy - flexible indexes): update when CFTimeIndex is a xarray Index subclass new_pd_index = da_cftime.xindexes["time"].to_pandas_index() assert isinstance(new_pd_index, CFTimeIndex) # shouldn't that be a pd.Index? - da_cftime["time"] = new_pd_index.to_datetimeindex() + da_cftime["time"] = new_pd_index.to_datetimeindex(time_unit="ns") xr.testing.assert_identical(da_cftime, da_datetime) diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index 3c7f46f5a02..d42f86f5ea6 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -1868,7 +1868,7 @@ def test_resample( def resample_as_pandas(array, *args, **kwargs): array_ = array.copy(deep=True) if use_cftime: - array_["time"] = times.to_datetimeindex() + array_["time"] = times.to_datetimeindex(time_unit="ns") result = DataArray.from_series( array_.to_series().resample(*args, **kwargs).mean() ) @@ -2321,7 +2321,7 @@ def test_resample( def resample_as_pandas(ds, *args, **kwargs): ds_ = ds.copy(deep=True) if use_cftime: - ds_["time"] = times.to_datetimeindex() + ds_["time"] = times.to_datetimeindex(time_unit="ns") result = Dataset.from_dataframe( ds_.to_dataframe().resample(*args, **kwargs).mean() ) diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py index eb21cca0861..c1e2bb2ac85 100644 --- a/xarray/tests/test_missing.py +++ b/xarray/tests/test_missing.py @@ -609,7 +609,9 @@ def test_get_clean_interp_index_cf_calendar(cf_da, calendar): def test_get_clean_interp_index_dt(cf_da, calendar, freq): """In the gregorian case, the index should be proportional to normal datetimes.""" g = cf_da(calendar, freq=freq) - g["stime"] = xr.Variable(data=g.time.to_index().to_datetimeindex(), dims=("time",)) + g["stime"] = xr.Variable( + data=g.time.to_index().to_datetimeindex(time_unit="ns"), dims=("time",) + ) gi = get_clean_interp_index(g, "time") si = get_clean_interp_index(g, "time", use_coordinate="stime") From 09c0ae435a67578dbd27f5c07f170f554fa6ecf4 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Tue, 21 Jan 2025 21:29:06 -0500 Subject: [PATCH 2/6] Update xarray/tests/test_cftime_offsets.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Kai Mühlbauer --- xarray/tests/test_cftime_offsets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_cftime_offsets.py b/xarray/tests/test_cftime_offsets.py index ee2f2f06c4f..086e187d2e6 100644 --- a/xarray/tests/test_cftime_offsets.py +++ b/xarray/tests/test_cftime_offsets.py @@ -1670,7 +1670,7 @@ def test_new_to_legacy_freq_pd_freq_passthrough(freq, expected): pytest.param("-1YE", marks=requires_pandas_3), ), ) -def test_cftime_range_same_as_pandas(start, end, freq): +def test_cftime_range_same_as_pandas(start, end, freq) -> None: result = date_range(start, end, freq=freq, calendar="standard", use_cftime=True) result = result.to_datetimeindex(time_unit="ns") expected = date_range(start, end, freq=freq, use_cftime=False) From 06e7c6a5cf8796b3dd07fe5e3b2338b9592c9ff6 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Tue, 21 Jan 2025 21:29:27 -0500 Subject: [PATCH 3/6] Apply suggestions from code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Kai Mühlbauer --- xarray/tests/test_groupby.py | 2 +- xarray/tests/test_missing.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index d42f86f5ea6..677854ca23a 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -1865,7 +1865,7 @@ def test_resample( "2000-01-01", freq="6h", periods=10, use_cftime=use_cftime ) - def resample_as_pandas(array, *args, **kwargs): + def resample_as_pandas(array, *args, **kwargs) -> None: array_ = array.copy(deep=True) if use_cftime: array_["time"] = times.to_datetimeindex(time_unit="ns") diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py index c1e2bb2ac85..d3eaa70f1df 100644 --- a/xarray/tests/test_missing.py +++ b/xarray/tests/test_missing.py @@ -606,7 +606,7 @@ def test_get_clean_interp_index_cf_calendar(cf_da, calendar): @requires_cftime @pytest.mark.parametrize("calendar", ["gregorian", "proleptic_gregorian"]) @pytest.mark.parametrize("freq", ["1D", "1ME", "1YE"]) -def test_get_clean_interp_index_dt(cf_da, calendar, freq): +def test_get_clean_interp_index_dt(cf_da, calendar, freq) -> None: """In the gregorian case, the index should be proportional to normal datetimes.""" g = cf_da(calendar, freq=freq) g["stime"] = xr.Variable( From 04f138ff69ce8d278953395af62bbd2e104481cf Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Tue, 21 Jan 2025 21:29:51 -0500 Subject: [PATCH 4/6] Update xarray/tests/test_cftimeindex.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Kai Mühlbauer --- xarray/tests/test_cftimeindex.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_cftimeindex.py b/xarray/tests/test_cftimeindex.py index 6df6ebe748b..85622d54973 100644 --- a/xarray/tests/test_cftimeindex.py +++ b/xarray/tests/test_cftimeindex.py @@ -1285,7 +1285,7 @@ def test_multiindex(): @requires_cftime @pytest.mark.parametrize("freq", ["3663s", "33min", "2h"]) @pytest.mark.parametrize("method", ["floor", "ceil", "round"]) -def test_rounding_methods_against_datetimeindex(freq, method): +def test_rounding_methods_against_datetimeindex(freq, method) -> None: # for now unit="us" seems good enough expected = pd.date_range("2000-01-02T01:03:51", periods=10, freq="1777s", unit="ns") expected = getattr(expected, method)(freq) From 1375034e8f9be0a596254253884cb2dec6373176 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Mon, 27 Jan 2025 20:48:28 -0500 Subject: [PATCH 5/6] Apply Deepak's wording suggestion Co-authored-by: Deepak Cherian --- xarray/coding/cftimeindex.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/coding/cftimeindex.py b/xarray/coding/cftimeindex.py index 06f44399084..34b266b9e2f 100644 --- a/xarray/coding/cftimeindex.py +++ b/xarray/coding/cftimeindex.py @@ -600,7 +600,7 @@ def to_datetimeindex( "In a future version of xarray to_datetimeindex will default " "to returning a 'us'-resolution DatetimeIndex instead of a " "'ns'-resolution DatetimeIndex. This warning can be silenced " - "by explicitly setting the time_unit of the index returned.", + "by explicitly passing the `time_unit` keyword argument.", FutureWarning, ) time_unit = "ns" From 79f744e3006cce792c42717cbe930303534b8d22 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Wed, 29 Jan 2025 09:08:32 +0100 Subject: [PATCH 6/6] Update xarray/tests/test_groupby.py --- xarray/tests/test_groupby.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index 104061ea0b1..2287130ebe2 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -1867,7 +1867,7 @@ def test_resample( "2000-01-01", freq="6h", periods=10, use_cftime=use_cftime ) - def resample_as_pandas(array, *args, **kwargs) -> None: + def resample_as_pandas(array, *args, **kwargs): array_ = array.copy(deep=True) if use_cftime: array_["time"] = times.to_datetimeindex(time_unit="ns")