diff --git a/doc/time-series.rst b/doc/time-series.rst index 32c6b581aa4..49d23634694 100644 --- a/doc/time-series.rst +++ b/doc/time-series.rst @@ -300,13 +300,19 @@ For data indexed by a :py:class:`~xarray.CFTimeIndex` xarray currently supports: da.differentiate('time') -- And serialization: +- Serialization: .. ipython:: python da.to_netcdf('example-no-leap.nc') xr.open_dataset('example-no-leap.nc') +- And resampling along the time dimension for data indexed by a :py:class:`~xarray.CFTimeIndex`: + +.. ipython:: python + + da.resample(time='81T', closed='right', label='right', base=3).mean() + .. note:: While much of the time series functionality that is possible for standard @@ -314,17 +320,14 @@ For data indexed by a :py:class:`~xarray.CFTimeIndex` xarray currently supports: still some remaining important features that have yet to be implemented, for example: - - Resampling along the time dimension for data indexed by a - :py:class:`~xarray.CFTimeIndex` (:issue:`2191`, :issue:`2458`) - Built-in plotting of data with :py:class:`cftime.datetime` coordinate axes (:issue:`2164`). For some use-cases it may still be useful to convert from a :py:class:`~xarray.CFTimeIndex` to a :py:class:`pandas.DatetimeIndex`, - despite the difference in calendar types (e.g. to allow the use of some - forms of resample with non-standard calendars). The recommended way of - doing this is to use the built-in - :py:meth:`~xarray.CFTimeIndex.to_datetimeindex` method: + despite the difference in calendar types. The recommended way of doing this + is to use the built-in :py:meth:`~xarray.CFTimeIndex.to_datetimeindex` + method: .. ipython:: python :okwarning: @@ -334,8 +337,7 @@ For data indexed by a :py:class:`~xarray.CFTimeIndex` xarray currently supports: da datetimeindex = da.indexes['time'].to_datetimeindex() da['time'] = datetimeindex - da.resample(time='Y').mean('time') - + However in this case one should use caution to only perform operations which do not depend on differences between dates (e.g. differentiation, interpolation, or upsampling with resample), as these could introduce subtle diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 184cee05ae2..52b7df2cb00 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -43,6 +43,10 @@ Enhancements report showing what exactly differs between the two objects (dimensions / coordinates / variables / attributes) (:issue:`1507`). By `Benoit Bovy `_. +- Resampling of standard and non-standard calendars indexed by + :py:class:`~xarray.CFTimeIndex` is now possible. (:issue:`2191`). + By `Jwen Fai Low `_ and + `Spencer Clark `_. Bug fixes ~~~~~~~~~ diff --git a/xarray/coding/cftime_offsets.py b/xarray/coding/cftime_offsets.py index a373aeff747..b9b9ae0e2df 100644 --- a/xarray/coding/cftime_offsets.py +++ b/xarray/coding/cftime_offsets.py @@ -75,6 +75,7 @@ def get_date_type(calendar): class BaseCFTimeOffset(object): _freq = None # type: ClassVar[str] + _day_option = None def __init__(self, n=1): if not isinstance(n, int): @@ -151,6 +152,53 @@ def __str__(self): def __repr__(self): return str(self) + def _get_offset_day(self, other): + # subclass must implement `_day_option`; calling from the base class + # will raise NotImplementedError. + return _get_day_of_month(other, self._day_option) + + +def _is_normalized(datetime): + if (datetime.hour != 0 or datetime.minute != 0 or datetime.second != 0 or + datetime.microsecond != 0): + return False + return True + + +def _get_day_of_month(other, day_option): + """Find the day in `other`'s month that satisfies a DateOffset's onOffset + policy, as described by the `day_opt` argument. + + Parameters + ---------- + other : cftime.datetime + day_option : 'start', 'end', or int + 'start': returns 1 + 'end': returns last day of the month + int: returns the day in the month indicated by `other`, or the last of + day the month if the value exceeds in that month's number of days. + + Returns + ------- + day_of_month : int + + """ + + if day_option == 'start': + return 1 + elif day_option == 'end': + days_in_month = _days_in_month(other) + return days_in_month + elif isinstance(day_option, np.integer): + days_in_month = _days_in_month(other) + return min(day_option, days_in_month) + elif day_option is None: + # Note: unlike `_shift_month`, get_day_of_month does not + # allow day_option = None + raise NotImplementedError + else: + raise ValueError(day_option) + def _days_in_month(date): """The number of days in the month of the given date""" @@ -186,7 +234,7 @@ def _adjust_n_years(other, n, month, reference_day): return n -def _shift_months(date, months, day_option='start'): +def _shift_month(date, months, day_option='start'): """Shift the date to a month start or end a given number of months away. """ delta_year = (date.month + months) // 12 @@ -211,12 +259,53 @@ def _shift_months(date, months, day_option='start'): return date.replace(year=year, month=month, day=day, dayofwk=-1) +def roll_qtrday(other, n, month, day_option, modby=3): + """Possibly increment or decrement the number of periods to shift + based on rollforward/rollbackward conventions. + + Parameters + ---------- + other : cftime.datetime + n : number of periods to increment, before adjusting for rolling + month : int reference month giving the first month of the year + day_option : 'start', 'end', 'business_start', 'business_end', or int + The convention to use in finding the day in a given month against + which to compare for rollforward/rollbackward decisions. + modby : int 3 for quarters, 12 for years + + Returns + ------- + n : int number of periods to increment + + See Also + -------- + get_day_of_month : Find the day in a month provided an offset. + """ + + months_since = other.month % modby - month % modby + + if n > 0: + if months_since < 0 or ( + months_since == 0 and + other.day < _get_day_of_month(other, day_option)): + # pretend to roll back if on same month but + # before compare_day + n -= 1 + else: + if months_since > 0 or ( + months_since == 0 and + other.day > _get_day_of_month(other, day_option)): + # make sure to roll forward, so negate + n += 1 + return n + + class MonthBegin(BaseCFTimeOffset): _freq = 'MS' def __apply__(self, other): n = _adjust_n_months(other.day, self.n, 1) - return _shift_months(other, n, 'start') + return _shift_month(other, n, 'start') def onOffset(self, date): """Check if the given date is in the set of possible dates created @@ -229,7 +318,7 @@ class MonthEnd(BaseCFTimeOffset): def __apply__(self, other): n = _adjust_n_months(other.day, self.n, _days_in_month(other)) - return _shift_months(other, n, 'end') + return _shift_month(other, n, 'end') def onOffset(self, date): """Check if the given date is in the set of possible dates created @@ -253,6 +342,121 @@ def onOffset(self, date): } +class QuarterOffset(BaseCFTimeOffset): + """Quarter representation copied off of pandas/tseries/offsets.py + """ + _freq = None # type: ClassVar[str] + _default_month = None # type: ClassVar[int] + + def __init__(self, n=1, normalize=False, month=None): + BaseCFTimeOffset.__init__(self, n) + self.normalize = normalize + if month is None: + self.month = self._default_month + else: + self.month = month + if not isinstance(self.month, int): + raise TypeError("'self.month' must be an integer value between 1 " + "and 12. Instead, it was set to a value of " + "{!r}".format(self.month)) + elif not (1 <= self.month <= 12): + raise ValueError("'self.month' must be an integer value between 1 " + "and 12. Instead, it was set to a value of " + "{!r}".format(self.month)) + + def __apply__(self, other): + # months_since: find the calendar quarter containing other.month, + # e.g. if other.month == 8, the calendar quarter is [Jul, Aug, Sep]. + # Then find the month in that quarter containing an onOffset date for + # self. `months_since` is the number of months to shift other.month + # to get to this on-offset month. + months_since = other.month % 3 - self.month % 3 + qtrs = roll_qtrday(other, self.n, self.month, + day_option=self._day_option, modby=3) + months = qtrs * 3 - months_since + return _shift_month(other, months, self._day_option) + + def onOffset(self, date): + """Check if the given date is in the set of possible dates created + using a length-one version of this offset class.""" + if self.normalize and not _is_normalized(date): + return False + mod_month = (date.month - self.month) % 3 + return mod_month == 0 and date.day == self._get_offset_day(date) + + def __sub__(self, other): + import cftime + + if isinstance(other, cftime.datetime): + raise TypeError('Cannot subtract cftime.datetime from offset.') + elif type(other) == type(self) and other.month == self.month: + return type(self)(self.n - other.n, month=self.month) + else: + return NotImplemented + + def __mul__(self, other): + return type(self)(n=other * self.n, month=self.month) + + def rule_code(self): + return '{}-{}'.format(self._freq, _MONTH_ABBREVIATIONS[self.month]) + + def __str__(self): + return '<{}: n={}, month={}>'.format( + type(self).__name__, self.n, self.month) + + +class QuarterBegin(QuarterOffset): + """Default month for QuarterBegin is December + DateOffset increments between Quarter dates. + + month = 1 corresponds to dates like 1/31/2007, 4/30/2007, ... + month = 2 corresponds to dates like 2/28/2007, 5/31/2007, ... + month = 3 corresponds to dates like 3/31/2007, 6/30/2007, ... + """ + # In pandas, _from_name_startingMonth = 1 used when freq='QS' + _default_month = 1 + _freq = 'QS' + _day_option = 'start' + + def rollforward(self, date): + """Roll date forward to nearest start of quarter""" + if self.onOffset(date): + return date + else: + return date + QuarterBegin(month=self.month) + + def rollback(self, date): + """Roll date backward to nearest start of quarter""" + if self.onOffset(date): + return date + else: + return date - QuarterBegin(month=self.month) + + +class QuarterEnd(QuarterOffset): + """Default month for QuarterEnd is December + """ + # In pandas, QuarterOffset._from_name suffix == 'DEC' + # See _lite_rule_alias in pandas._libs.tslibs.frequencies + _default_month = 12 + _freq = 'Q' + _day_option = 'end' + + def rollforward(self, date): + """Roll date forward to nearest end of quarter""" + if self.onOffset(date): + return date + else: + return date + QuarterEnd(month=self.month) + + def rollback(self, date): + """Roll date backward to nearest end of quarter""" + if self.onOffset(date): + return date + else: + return date - QuarterEnd(month=self.month) + + class YearOffset(BaseCFTimeOffset): _freq = None # type: ClassVar[str] _day_option = None # type: ClassVar[str] @@ -282,7 +486,7 @@ def __apply__(self, other): raise ValueError(self._day_option) years = _adjust_n_years(other, self.n, self.month, reference_day) months = years * 12 + (self.month - other.month) - return _shift_months(other, months, self._day_option) + return _shift_month(other, months, self._day_option) def __sub__(self, other): import cftime @@ -358,29 +562,41 @@ def rollback(self, date): class Day(BaseCFTimeOffset): _freq = 'D' + def as_timedelta(self): + return timedelta(days=self.n) + def __apply__(self, other): - return other + timedelta(days=self.n) + return other + self.as_timedelta() class Hour(BaseCFTimeOffset): _freq = 'H' + def as_timedelta(self): + return timedelta(hours=self.n) + def __apply__(self, other): - return other + timedelta(hours=self.n) + return other + self.as_timedelta() class Minute(BaseCFTimeOffset): _freq = 'T' + def as_timedelta(self): + return timedelta(minutes=self.n) + def __apply__(self, other): - return other + timedelta(minutes=self.n) + return other + self.as_timedelta() class Second(BaseCFTimeOffset): _freq = 'S' + def as_timedelta(self): + return timedelta(seconds=self.n) + def __apply__(self, other): - return other + timedelta(seconds=self.n) + return other + self.as_timedelta() _FREQUENCIES = { @@ -388,6 +604,8 @@ def __apply__(self, other): 'AS': YearBegin, 'Y': YearEnd, 'YS': YearBegin, + 'Q': QuarterEnd, + 'QS': QuarterBegin, 'M': MonthEnd, 'MS': MonthBegin, 'D': Day, @@ -418,7 +636,31 @@ def __apply__(self, other): 'A-SEP': partial(YearEnd, month=9), 'A-OCT': partial(YearEnd, month=10), 'A-NOV': partial(YearEnd, month=11), - 'A-DEC': partial(YearEnd, month=12) + 'A-DEC': partial(YearEnd, month=12), + 'QS-JAN': partial(QuarterBegin, month=1), + 'QS-FEB': partial(QuarterBegin, month=2), + 'QS-MAR': partial(QuarterBegin, month=3), + 'QS-APR': partial(QuarterBegin, month=4), + 'QS-MAY': partial(QuarterBegin, month=5), + 'QS-JUN': partial(QuarterBegin, month=6), + 'QS-JUL': partial(QuarterBegin, month=7), + 'QS-AUG': partial(QuarterBegin, month=8), + 'QS-SEP': partial(QuarterBegin, month=9), + 'QS-OCT': partial(QuarterBegin, month=10), + 'QS-NOV': partial(QuarterBegin, month=11), + 'QS-DEC': partial(QuarterBegin, month=12), + 'Q-JAN': partial(QuarterEnd, month=1), + 'Q-FEB': partial(QuarterEnd, month=2), + 'Q-MAR': partial(QuarterEnd, month=3), + 'Q-APR': partial(QuarterEnd, month=4), + 'Q-MAY': partial(QuarterEnd, month=5), + 'Q-JUN': partial(QuarterEnd, month=6), + 'Q-JUL': partial(QuarterEnd, month=7), + 'Q-AUG': partial(QuarterEnd, month=8), + 'Q-SEP': partial(QuarterEnd, month=9), + 'Q-OCT': partial(QuarterEnd, month=10), + 'Q-NOV': partial(QuarterEnd, month=11), + 'Q-DEC': partial(QuarterEnd, month=12) } @@ -427,6 +669,11 @@ def __apply__(self, other): _FREQUENCY_CONDITION) +# pandas defines these offsets as "Tick" objects, which for instance have +# distinct behavior from monthly or longer frequencies in resample. +CFTIME_TICKS = (Day, Hour, Minute, Second) + + def to_offset(freq): """Convert a frequency string to the appropriate subclass of BaseCFTimeOffset.""" @@ -607,55 +854,84 @@ def cftime_range(start=None, end=None, periods=None, freq='D', Valid simple frequency strings for use with ``cftime``-calendars include any multiples of the following. - +--------+-----------------------+ - | Alias | Description | - +========+=======================+ - | A, Y | Year-end frequency | - +--------+-----------------------+ - | AS, YS | Year-start frequency | - +--------+-----------------------+ - | M | Month-end frequency | - +--------+-----------------------+ - | MS | Month-start frequency | - +--------+-----------------------+ - | D | Day frequency | - +--------+-----------------------+ - | H | Hour frequency | - +--------+-----------------------+ - | T, min | Minute frequency | - +--------+-----------------------+ - | S | Second frequency | - +--------+-----------------------+ + +--------+--------------------------+ + | Alias | Description | + +========+==========================+ + | A, Y | Year-end frequency | + +--------+--------------------------+ + | AS, YS | Year-start frequency | + +--------+--------------------------+ + | Q | Quarter-end frequency | + +--------+--------------------------+ + | QS | Quarter-start frequency | + +--------+--------------------------+ + | M | Month-end frequency | + +--------+--------------------------+ + | MS | Month-start frequency | + +--------+--------------------------+ + | D | Day frequency | + +--------+--------------------------+ + | H | Hour frequency | + +--------+--------------------------+ + | T, min | Minute frequency | + +--------+--------------------------+ + | S | Second frequency | + +--------+--------------------------+ Any multiples of the following anchored offsets are also supported. - +----------+-------------------------------------------------------------------+ - | Alias | Description | - +==========+===================================================================+ - | A(S)-JAN | Annual frequency, anchored at the end (or beginning) of January | - +----------+-------------------------------------------------------------------+ - | A(S)-FEB | Annual frequency, anchored at the end (or beginning) of February | - +----------+-------------------------------------------------------------------+ - | A(S)-MAR | Annual frequency, anchored at the end (or beginning) of March | - +----------+-------------------------------------------------------------------+ - | A(S)-APR | Annual frequency, anchored at the end (or beginning) of April | - +----------+-------------------------------------------------------------------+ - | A(S)-MAY | Annual frequency, anchored at the end (or beginning) of May | - +----------+-------------------------------------------------------------------+ - | A(S)-JUN | Annual frequency, anchored at the end (or beginning) of June | - +----------+-------------------------------------------------------------------+ - | A(S)-JUL | Annual frequency, anchored at the end (or beginning) of July | - +----------+-------------------------------------------------------------------+ - | A(S)-AUG | Annual frequency, anchored at the end (or beginning) of August | - +----------+-------------------------------------------------------------------+ - | A(S)-SEP | Annual frequency, anchored at the end (or beginning) of September | - +----------+-------------------------------------------------------------------+ - | A(S)-OCT | Annual frequency, anchored at the end (or beginning) of October | - +----------+-------------------------------------------------------------------+ - | A(S)-NOV | Annual frequency, anchored at the end (or beginning) of November | - +----------+-------------------------------------------------------------------+ - | A(S)-DEC | Annual frequency, anchored at the end (or beginning) of December | - +----------+-------------------------------------------------------------------+ + +----------+--------------------------------------------------------------------+ + | Alias | Description | + +==========+====================================================================+ + | A(S)-JAN | Annual frequency, anchored at the end (or beginning) of January | + +----------+--------------------------------------------------------------------+ + | A(S)-FEB | Annual frequency, anchored at the end (or beginning) of February | + +----------+--------------------------------------------------------------------+ + | A(S)-MAR | Annual frequency, anchored at the end (or beginning) of March | + +----------+--------------------------------------------------------------------+ + | A(S)-APR | Annual frequency, anchored at the end (or beginning) of April | + +----------+--------------------------------------------------------------------+ + | A(S)-MAY | Annual frequency, anchored at the end (or beginning) of May | + +----------+--------------------------------------------------------------------+ + | A(S)-JUN | Annual frequency, anchored at the end (or beginning) of June | + +----------+--------------------------------------------------------------------+ + | A(S)-JUL | Annual frequency, anchored at the end (or beginning) of July | + +----------+--------------------------------------------------------------------+ + | A(S)-AUG | Annual frequency, anchored at the end (or beginning) of August | + +----------+--------------------------------------------------------------------+ + | A(S)-SEP | Annual frequency, anchored at the end (or beginning) of September | + +----------+--------------------------------------------------------------------+ + | A(S)-OCT | Annual frequency, anchored at the end (or beginning) of October | + +----------+--------------------------------------------------------------------+ + | A(S)-NOV | Annual frequency, anchored at the end (or beginning) of November | + +----------+--------------------------------------------------------------------+ + | A(S)-DEC | Annual frequency, anchored at the end (or beginning) of December | + +----------+--------------------------------------------------------------------+ + | Q(S)-JAN | Quarter frequency, anchored at the end (or beginning) of January | + +----------+--------------------------------------------------------------------+ + | Q(S)-FEB | Quarter frequency, anchored at the end (or beginning) of February | + +----------+--------------------------------------------------------------------+ + | Q(S)-MAR | Quarter frequency, anchored at the end (or beginning) of January | + +----------+--------------------------------------------------------------------+ + | Q(S)-APR | Quarter frequency, anchored at the end (or beginning) of February | + +----------+--------------------------------------------------------------------+ + | Q(S)-MAY | Quarter frequency, anchored at the end (or beginning) of January | + +----------+--------------------------------------------------------------------+ + | Q(S)-JUN | Quarter frequency, anchored at the end (or beginning) of February | + +----------+--------------------------------------------------------------------+ + | Q(S)-JUL | Quarter frequency, anchored at the end (or beginning) of January | + +----------+--------------------------------------------------------------------+ + | Q(S)-AUG | Quarter frequency, anchored at the end (or beginning) of February | + +----------+--------------------------------------------------------------------+ + | Q(S)-SEP | Quarter frequency, anchored at the end (or beginning) of January | + +----------+--------------------------------------------------------------------+ + | Q(S)-OCT | Quarter frequency, anchored at the end (or beginning) of February | + +----------+--------------------------------------------------------------------+ + | Q(S)-NOV | Quarter frequency, anchored at the end (or beginning) of January | + +----------+--------------------------------------------------------------------+ + | Q(S)-DEC | Quarter frequency, anchored at the end (or beginning) of February | + +----------+--------------------------------------------------------------------+ + Finally, the following calendar aliases are supported. diff --git a/xarray/core/common.py b/xarray/core/common.py index f50b5bfedf4..d9d86f64da0 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -749,23 +749,16 @@ def resample(self, indexer=None, skipna=None, closed=None, label=None, dim_coord = self[dim] if isinstance(self.indexes[dim_name], CFTimeIndex): - raise NotImplementedError( - 'Resample is currently not supported along a dimension ' - 'indexed by a CFTimeIndex. For certain kinds of downsampling ' - 'it may be possible to work around this by converting your ' - 'time index to a DatetimeIndex using ' - 'CFTimeIndex.to_datetimeindex. Use caution when doing this ' - 'however, because switching to a DatetimeIndex from a ' - 'CFTimeIndex with a non-standard calendar entails a change ' - 'in the calendar type, which could lead to subtle and silent ' - 'errors.' - ) - + from .resample_cftime import CFTimeGrouper + grouper = CFTimeGrouper(freq, closed, label, base, loffset) + else: + # TODO: to_offset() call required for pandas==0.19.2 + grouper = pd.Grouper(freq=freq, closed=closed, label=label, + base=base, + loffset=pd.tseries.frequencies.to_offset( + loffset)) group = DataArray(dim_coord, coords=dim_coord.coords, dims=dim_coord.dims, name=RESAMPLE_DIM) - # TODO: to_offset() call required for pandas==0.19.2 - grouper = pd.Grouper(freq=freq, closed=closed, label=label, base=base, - loffset=pd.tseries.frequencies.to_offset(loffset)) resampler = self._resample_cls(self, group=group, dim=dim_name, grouper=grouper, resample_dim=RESAMPLE_DIM) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index e4577c3d593..63041b887ed 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -259,11 +259,23 @@ def __init__(self, obj, group, squeeze=False, grouper=None, bins=None, # TODO: sort instead of raising an error raise ValueError('index must be monotonic for resampling') s = pd.Series(np.arange(index.size), index) - first_items = s.groupby(grouper).first() - _apply_loffset(grouper, first_items) + from .resample_cftime import CFTimeGrouper + if isinstance(grouper, CFTimeGrouper): + first_items = grouper.first_items(index) + else: + first_items = s.groupby(grouper).first() + _apply_loffset(grouper, first_items) full_index = first_items.index if first_items.isnull().any(): - first_items = first_items.dropna() + if isinstance(grouper, CFTimeGrouper): + index_dict = dict(zip(np.arange(first_items.size), + first_items.index.values)) + first_items.index = np.arange(first_items.size) + first_items = first_items.dropna() + first_items.index = [index_dict[i] for i in + first_items.index.values] + else: + first_items = first_items.dropna() sbins = first_items.values.astype(np.int64) group_indices = ([slice(i, j) for i, j in zip(sbins[:-1], sbins[1:])] + diff --git a/xarray/core/options.py b/xarray/core/options.py index c9d26c3e577..b8e1920a6ce 100644 --- a/xarray/core/options.py +++ b/xarray/core/options.py @@ -64,9 +64,8 @@ def _get_keep_attrs(default): elif global_choice in [True, False]: return global_choice else: - raise ValueError( - "The global option keep_attrs must be one of" - " True, False or 'default'.") + raise ValueError("The global option keep_attrs must be one of" + " True, False or 'default'.") class set_options(object): diff --git a/xarray/core/resample_cftime.py b/xarray/core/resample_cftime.py new file mode 100644 index 00000000000..3f33fbcace7 --- /dev/null +++ b/xarray/core/resample_cftime.py @@ -0,0 +1,365 @@ +"""Resampling for CFTimeIndex. Does not support non-integer freq.""" +# The mechanisms for resampling CFTimeIndex was copied and adapted from +# the source code defined in pandas.core.resample +# +# For reference, here is a copy of the pandas copyright notice: +# +# BSD 3-Clause License +# +# Copyright (c) 2008-2012, AQR Capital Management, LLC, Lambda Foundry, Inc. +# and PyData Development Team +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +from __future__ import absolute_import, division, print_function + +from ..coding.cftimeindex import CFTimeIndex +from ..coding.cftime_offsets import (cftime_range, normalize_date, + Day, MonthEnd, QuarterEnd, YearEnd, + CFTIME_TICKS, to_offset) +import datetime +import numpy as np +import pandas as pd + + +class CFTimeGrouper(object): + """This is a simple container for the grouping parameters that implements a + single method, the only one required for resampling in xarray. It cannot + be used in a call to groupby like a pandas.Grouper object can.""" + + def __init__(self, freq, closed=None, label=None, base=0, loffset=None): + self.freq = to_offset(freq) + self.closed = closed + self.label = label + self.base = base + self.loffset = loffset + + if isinstance(self.freq, (MonthEnd, QuarterEnd, YearEnd)): + if self.closed is None: + self.closed = 'right' + if self.label is None: + self.label = 'right' + else: + if self.closed is None: + self.closed = 'left' + if self.label is None: + self.label = 'left' + + def first_items(self, index): + """Meant to reproduce the results of the following + + grouper = pandas.Grouper(...) + first_items = pd.Series(np.arange(len(index)), + index).groupby(grouper).first() + + with index being a CFTimeIndex instead of a DatetimeIndex. + """ + + datetime_bins, labels = _get_time_bins(index, self.freq, self.closed, + self.label, self.base) + if self.loffset is not None: + if isinstance(self.loffset, datetime.timedelta): + labels = labels + self.loffset + else: + labels = labels + to_offset(self.loffset) + + # check binner fits data + if index[0] < datetime_bins[0]: + raise ValueError("Value falls before first bin") + if index[-1] > datetime_bins[-1]: + raise ValueError("Value falls after last bin") + + integer_bins = np.searchsorted( + index, datetime_bins, side=self.closed)[:-1] + first_items = pd.Series(integer_bins, labels) + + # Mask duplicate values with NaNs, preserving the last values + non_duplicate = ~first_items.duplicated('last') + return first_items.where(non_duplicate) + + +def _get_time_bins(index, freq, closed, label, base): + """Obtain the bins and their respective labels for resampling operations. + + Parameters + ---------- + index : CFTimeIndex + Index object to be resampled (e.g., CFTimeIndex named 'time'). + freq : xarray.coding.cftime_offsets.BaseCFTimeOffset + The offset object representing target conversion a.k.a. resampling + frequency (e.g., 'MS', '2D', 'H', or '3T' with + coding.cftime_offsets.to_offset() applied to it). + closed : 'left' or 'right', optional + Which side of bin interval is closed. + The default is 'left' for all frequency offsets except for 'M' and 'A', + which have a default of 'right'. + label : 'left' or 'right', optional + Which bin edge label to label bucket with. + The default is 'left' for all frequency offsets except for 'M' and 'A', + which have a default of 'right'. + base : int, optional + For frequencies that evenly subdivide 1 day, the "origin" of the + aggregated intervals. For example, for '5min' frequency, base could + range from 0 through 4. Defaults to 0. + + Returns + ------- + datetime_bins : CFTimeIndex + Defines the edge of resampling bins by which original index values will + be grouped into. + labels : CFTimeIndex + Define what the user actually sees the bins labeled as. + """ + + if not isinstance(index, CFTimeIndex): + raise TypeError('index must be a CFTimeIndex, but got ' + 'an instance of %r' % type(index).__name__) + if len(index) == 0: + datetime_bins = labels = CFTimeIndex(data=[], name=index.name) + return datetime_bins, labels + + first, last = _get_range_edges(index.min(), index.max(), freq, + closed=closed, + base=base) + datetime_bins = labels = cftime_range(freq=freq, + start=first, + end=last, + name=index.name) + + datetime_bins, labels = _adjust_bin_edges(datetime_bins, freq, closed, + index, labels) + + if label == 'right': + labels = labels[1:] + else: + labels = labels[:-1] + + # TODO: when CFTimeIndex supports missing values, if the reference index + # contains missing values, insert the appropriate NaN value at the + # beginning of the datetime_bins and labels indexes. + + return datetime_bins, labels + + +def _adjust_bin_edges(datetime_bins, offset, closed, index, labels): + """This is required for determining the bin edges resampling with + daily frequencies greater than one day, month end, and year end + frequencies. + + Consider the following example. Let's say you want to downsample the + time series with the following coordinates to month end frequency: + + CFTimeIndex([2000-01-01 12:00:00, 2000-01-31 12:00:00, + 2000-02-01 12:00:00], dtype='object') + + Without this adjustment, _get_time_bins with month-end frequency will + return the following index for the bin edges (default closed='right' and + label='right' in this case): + + CFTimeIndex([1999-12-31 00:00:00, 2000-01-31 00:00:00, + 2000-02-29 00:00:00], dtype='object') + + If 2000-01-31 is used as a bound for a bin, the value on + 2000-01-31T12:00:00 (at noon on January 31st), will not be included in the + month of January. To account for this, pandas adds a day minus one worth + of microseconds to the bin edges generated by cftime range, so that we do + bin the value at noon on January 31st in the January bin. This results in + an index with bin edges like the following: + + CFTimeIndex([1999-12-31 23:59:59, 2000-01-31 23:59:59, + 2000-02-29 23:59:59], dtype='object') + + The labels are still: + + CFTimeIndex([2000-01-31 00:00:00, 2000-02-29 00:00:00], dtype='object') + + This is also required for daily frequencies longer than one day and + year-end frequencies. + """ + is_super_daily = (isinstance(offset, (MonthEnd, QuarterEnd, YearEnd)) or + (isinstance(offset, Day) and offset.n > 1)) + if is_super_daily: + if closed == 'right': + datetime_bins = datetime_bins + datetime.timedelta(days=1, + microseconds=-1) + if datetime_bins[-2] > index.max(): + datetime_bins = datetime_bins[:-1] + labels = labels[:-1] + + return datetime_bins, labels + + +def _get_range_edges(first, last, offset, closed='left', base=0): + """ Get the correct starting and ending datetimes for the resampled + CFTimeIndex range. + + Parameters + ---------- + first : cftime.datetime + Uncorrected starting datetime object for resampled CFTimeIndex range. + Usually the min of the original CFTimeIndex. + last : cftime.datetime + Uncorrected ending datetime object for resampled CFTimeIndex range. + Usually the max of the original CFTimeIndex. + offset : xarray.coding.cftime_offsets.BaseCFTimeOffset + The offset object representing target conversion a.k.a. resampling + frequency. Contains information on offset type (e.g. Day or 'D') and + offset magnitude (e.g., n = 3). + closed : 'left' or 'right', optional + Which side of bin interval is closed. Defaults to 'left'. + base : int, optional + For frequencies that evenly subdivide 1 day, the "origin" of the + aggregated intervals. For example, for '5min' frequency, base could + range from 0 through 4. Defaults to 0. + + Returns + ------- + first : cftime.datetime + Corrected starting datetime object for resampled CFTimeIndex range. + last : cftime.datetime + Corrected ending datetime object for resampled CFTimeIndex range. + """ + if isinstance(offset, CFTIME_TICKS): + first, last = _adjust_dates_anchored(first, last, offset, + closed=closed, base=base) + return first, last + else: + first = normalize_date(first) + last = normalize_date(last) + + if closed == 'left': + first = offset.rollback(first) + else: + first = first - offset + + last = last + offset + return first, last + + +def _adjust_dates_anchored(first, last, offset, closed='right', base=0): + """ First and last offsets should be calculated from the start day to fix + an error cause by resampling across multiple days when a one day period is + not a multiple of the frequency. + See https://github.com/pandas-dev/pandas/issues/8683 + + Parameters + ---------- + first : cftime.datetime + A datetime object representing the start of a CFTimeIndex range. + last : cftime.datetime + A datetime object representing the end of a CFTimeIndex range. + offset : xarray.coding.cftime_offsets.BaseCFTimeOffset + The offset object representing target conversion a.k.a. resampling + frequency. Contains information on offset type (e.g. Day or 'D') and + offset magnitude (e.g., n = 3). + closed : 'left' or 'right', optional + Which side of bin interval is closed. Defaults to 'right'. + base : int, optional + For frequencies that evenly subdivide 1 day, the "origin" of the + aggregated intervals. For example, for '5min' frequency, base could + range from 0 through 4. Defaults to 0. + + Returns + ------- + fresult : cftime.datetime + A datetime object representing the start of a date range that has been + adjusted to fix resampling errors. + lresult : cftime.datetime + A datetime object representing the end of a date range that has been + adjusted to fix resampling errors. + """ + + base = base % offset.n + start_day = normalize_date(first) + base_td = type(offset)(n=base).as_timedelta() + start_day += base_td + foffset = exact_cftime_datetime_difference( + start_day, first) % offset.as_timedelta() + loffset = exact_cftime_datetime_difference( + start_day, last) % offset.as_timedelta() + if closed == 'right': + if foffset.total_seconds() > 0: + fresult = first - foffset + else: + fresult = first - offset.as_timedelta() + + if loffset.total_seconds() > 0: + lresult = last + (offset.as_timedelta() - loffset) + else: + lresult = last + else: + if foffset.total_seconds() > 0: + fresult = first - foffset + else: + fresult = first + + if loffset.total_seconds() > 0: + lresult = last + (offset.as_timedelta() - loffset) + else: + lresult = last + offset.as_timedelta() + return fresult, lresult + + +def exact_cftime_datetime_difference(a, b): + """Exact computation of b - a + + Assumes: + + a = a_0 + a_m + b = b_0 + b_m + + Here a_0, and b_0 represent the input dates rounded + down to the nearest second, and a_m, and b_m represent + the remaining microseconds associated with date a and + date b. + + We can then express the value of b - a as: + + b - a = (b_0 + b_m) - (a_0 + a_m) = b_0 - a_0 + b_m - a_m + + By construction, we know that b_0 - a_0 must be a round number + of seconds. Therefore we can take the result of b_0 - a_0 using + ordinary cftime.datetime arithmetic and round to the nearest + second. b_m - a_m is the remainder, in microseconds, and we + can simply add this to the rounded timedelta. + + Parameters + ---------- + a : cftime.datetime + Input datetime + b : cftime.datetime + Input datetime + + Returns + ------- + datetime.timedelta + """ + seconds = b.replace(microsecond=0) - a.replace(microsecond=0) + seconds = int(round(seconds.total_seconds())) + microseconds = b.microsecond - a.microsecond + return datetime.timedelta(seconds=seconds, microseconds=microseconds) diff --git a/xarray/tests/test_cftime_offsets.py b/xarray/tests/test_cftime_offsets.py index b9d2cf520a8..187209b00ff 100644 --- a/xarray/tests/test_cftime_offsets.py +++ b/xarray/tests/test_cftime_offsets.py @@ -6,9 +6,9 @@ from xarray import CFTimeIndex from xarray.coding.cftime_offsets import ( - _MONTH_ABBREVIATIONS, BaseCFTimeOffset, Day, Hour, Minute, MonthBegin, - MonthEnd, Second, YearBegin, YearEnd, _days_in_month, cftime_range, - get_date_type, to_cftime_datetime, to_offset) + _MONTH_ABBREVIATIONS, BaseCFTimeOffset, Day, Hour, Minute, Second, + MonthBegin, MonthEnd, YearBegin, YearEnd, QuarterBegin, QuarterEnd, + _days_in_month, cftime_range, get_date_type, to_cftime_datetime, to_offset) cftime = pytest.importorskip('cftime') @@ -32,9 +32,13 @@ def calendar(request): [(BaseCFTimeOffset(), 1), (YearBegin(), 1), (YearEnd(), 1), + (QuarterBegin(), 1), + (QuarterEnd(), 1), (BaseCFTimeOffset(n=2), 2), (YearBegin(n=2), 2), - (YearEnd(n=2), 2)], + (YearEnd(n=2), 2), + (QuarterBegin(n=2), 2), + (QuarterEnd(n=2), 2)], ids=_id_func ) def test_cftime_offset_constructor_valid_n(offset, expected_n): @@ -45,7 +49,9 @@ def test_cftime_offset_constructor_valid_n(offset, expected_n): ('offset', 'invalid_n'), [(BaseCFTimeOffset, 1.5), (YearBegin, 1.5), - (YearEnd, 1.5)], + (YearEnd, 1.5), + (QuarterBegin, 1.5), + (QuarterEnd, 1.5)], ids=_id_func ) def test_cftime_offset_constructor_invalid_n(offset, invalid_n): @@ -58,7 +64,11 @@ def test_cftime_offset_constructor_invalid_n(offset, invalid_n): [(YearBegin(), 1), (YearEnd(), 12), (YearBegin(month=5), 5), - (YearEnd(month=5), 5)], + (YearEnd(month=5), 5), + (QuarterBegin(), 1), + (QuarterEnd(), 12), + (QuarterBegin(month=5), 5), + (QuarterEnd(month=5), 5)], ids=_id_func ) def test_year_offset_constructor_valid_month(offset, expected_month): @@ -72,7 +82,13 @@ def test_year_offset_constructor_valid_month(offset, expected_month): (YearBegin, 13, ValueError,), (YearEnd, 13, ValueError), (YearBegin, 1.5, TypeError), - (YearEnd, 1.5, TypeError)], + (YearEnd, 1.5, TypeError), + (QuarterBegin, 0, ValueError), + (QuarterEnd, 0, ValueError), + (QuarterBegin, 1.5, TypeError), + (QuarterEnd, 1.5, TypeError), + (QuarterBegin, 13, ValueError), + (QuarterEnd, 13, ValueError)], ids=_id_func ) def test_year_offset_constructor_invalid_month( @@ -85,7 +101,8 @@ def test_year_offset_constructor_invalid_month( ('offset', 'expected'), [(BaseCFTimeOffset(), None), (MonthBegin(), 'MS'), - (YearBegin(), 'AS-JAN')], + (YearBegin(), 'AS-JAN'), + (QuarterBegin(), 'QS-JAN')], ids=_id_func ) def test_rule_code(offset, expected): @@ -95,7 +112,8 @@ def test_rule_code(offset, expected): @pytest.mark.parametrize( ('offset', 'expected'), [(BaseCFTimeOffset(), ''), - (YearBegin(), '')], + (YearBegin(), ''), + (QuarterBegin(), '')], ids=_id_func ) def test_str_and_repr(offset, expected): @@ -105,7 +123,7 @@ def test_str_and_repr(offset, expected): @pytest.mark.parametrize( 'offset', - [BaseCFTimeOffset(), MonthBegin(), YearBegin()], + [BaseCFTimeOffset(), MonthBegin(), QuarterBegin(), YearBegin()], ids=_id_func ) def test_to_offset_offset_input(offset): @@ -164,7 +182,38 @@ def test_to_offset_annual(month_label, month_int, multiple, offset_str): assert result == expected -@pytest.mark.parametrize('freq', ['Z', '7min2', 'AM', 'M-', 'AS-', '1H1min']) +_QUARTER_OFFSET_TYPES = { + 'Q': QuarterEnd, + 'QS': QuarterBegin +} + + +@pytest.mark.parametrize(('month_int', 'month_label'), + list(_MONTH_ABBREVIATIONS.items()) + [(0, '')]) +@pytest.mark.parametrize('multiple', [None, 2]) +@pytest.mark.parametrize('offset_str', ['QS', 'Q']) +def test_to_offset_quarter(month_label, month_int, multiple, offset_str): + freq = offset_str + offset_type = _QUARTER_OFFSET_TYPES[offset_str] + if month_label: + freq = '-'.join([freq, month_label]) + if multiple: + freq = '{}'.format(multiple) + freq + result = to_offset(freq) + + if multiple and month_int: + expected = offset_type(n=multiple, month=month_int) + elif multiple: + expected = offset_type(n=multiple) + elif month_int: + expected = offset_type(month=month_int) + else: + expected = offset_type() + assert result == expected + + +@pytest.mark.parametrize('freq', ['Z', '7min2', 'AM', 'M-', 'AS-', 'QS-', + '1H1min']) def test_invalid_to_offset_str(freq): with pytest.raises(ValueError): to_offset(freq) @@ -197,13 +246,16 @@ def test_to_cftime_datetime_error_type_error(): _EQ_TESTS_A = [ BaseCFTimeOffset(), YearBegin(), YearEnd(), YearBegin(month=2), - YearEnd(month=2), MonthBegin(), MonthEnd(), Day(), Hour(), Minute(), + YearEnd(month=2), QuarterBegin(), QuarterEnd(), QuarterBegin(month=2), + QuarterEnd(month=2), MonthBegin(), MonthEnd(), Day(), Hour(), Minute(), Second() ] _EQ_TESTS_B = [ BaseCFTimeOffset(n=2), YearBegin(n=2), YearEnd(n=2), - YearBegin(n=2, month=2), YearEnd(n=2, month=2), MonthBegin(n=2), - MonthEnd(n=2), Day(n=2), Hour(n=2), Minute(n=2), Second(n=2) + YearBegin(n=2, month=2), YearEnd(n=2, month=2), QuarterBegin(n=2), + QuarterEnd(n=2), QuarterBegin(n=2, month=2), QuarterEnd(n=2, month=2), + MonthBegin(n=2), MonthEnd(n=2), Day(n=2), Hour(n=2), Minute(n=2), + Second(n=2) ] @@ -216,8 +268,10 @@ def test_neq(a, b): _EQ_TESTS_B_COPY = [ BaseCFTimeOffset(n=2), YearBegin(n=2), YearEnd(n=2), - YearBegin(n=2, month=2), YearEnd(n=2, month=2), MonthBegin(n=2), - MonthEnd(n=2), Day(n=2), Hour(n=2), Minute(n=2), Second(n=2) + YearBegin(n=2, month=2), YearEnd(n=2, month=2), QuarterBegin(n=2), + QuarterEnd(n=2), QuarterBegin(n=2, month=2), QuarterEnd(n=2, month=2), + MonthBegin(n=2), MonthEnd(n=2), Day(n=2), Hour(n=2), Minute(n=2), + Second(n=2) ] @@ -232,6 +286,8 @@ def test_eq(a, b): (BaseCFTimeOffset(), BaseCFTimeOffset(n=3)), (YearEnd(), YearEnd(n=3)), (YearBegin(), YearBegin(n=3)), + (QuarterEnd(), QuarterEnd(n=3)), + (QuarterBegin(), QuarterBegin(n=3)), (MonthEnd(), MonthEnd(n=3)), (MonthBegin(), MonthBegin(n=3)), (Day(), Day(n=3)), @@ -256,6 +312,8 @@ def test_rmul(offset, expected): [(BaseCFTimeOffset(), BaseCFTimeOffset(n=-1)), (YearEnd(), YearEnd(n=-1)), (YearBegin(), YearBegin(n=-1)), + (QuarterEnd(), QuarterEnd(n=-1)), + (QuarterBegin(), QuarterBegin(n=-1)), (MonthEnd(), MonthEnd(n=-1)), (MonthBegin(), MonthBegin(n=-1)), (Day(), Day(n=-1)), @@ -536,6 +594,89 @@ def test_add_year_end_onOffset( assert result == expected +@pytest.mark.parametrize( + ('initial_date_args', 'offset', 'expected_date_args'), + [((1, 1, 1), QuarterBegin(), (1, 4, 1)), + ((1, 1, 1), QuarterBegin(n=2), (1, 7, 1)), + ((1, 1, 1), QuarterBegin(month=2), (1, 2, 1)), + ((1, 1, 7), QuarterBegin(n=2), (1, 7, 1)), + ((2, 2, 1), QuarterBegin(n=-1), (2, 1, 1)), + ((1, 1, 2), QuarterBegin(n=-1), (1, 1, 1)), + ((1, 1, 1, 5, 5, 5, 5), QuarterBegin(), (1, 4, 1, 5, 5, 5, 5)), + ((2, 1, 1, 5, 5, 5, 5), QuarterBegin(n=-1), (1, 10, 1, 5, 5, 5, 5))], + ids=_id_func +) +def test_add_quarter_begin(calendar, initial_date_args, offset, + expected_date_args): + date_type = get_date_type(calendar) + initial = date_type(*initial_date_args) + result = initial + offset + expected = date_type(*expected_date_args) + assert result == expected + + +@pytest.mark.parametrize( + ('initial_date_args', 'offset', 'expected_year_month', + 'expected_sub_day'), + [((1, 1, 1), QuarterEnd(), (1, 3), ()), + ((1, 1, 1), QuarterEnd(n=2), (1, 6), ()), + ((1, 1, 1), QuarterEnd(month=1), (1, 1), ()), + ((2, 3, 1), QuarterEnd(n=-1), (1, 12), ()), + ((1, 3, 1), QuarterEnd(n=-1, month=2), (1, 2), ()), + ((1, 1, 1, 5, 5, 5, 5), QuarterEnd(), (1, 3), (5, 5, 5, 5)), + ((1, 1, 1, 5, 5, 5, 5), QuarterEnd(n=2), (1, 6), (5, 5, 5, 5))], + ids=_id_func +) +def test_add_quarter_end( + calendar, initial_date_args, offset, expected_year_month, + expected_sub_day +): + date_type = get_date_type(calendar) + initial = date_type(*initial_date_args) + result = initial + offset + reference_args = expected_year_month + (1,) + reference = date_type(*reference_args) + + # Here the days at the end of each month varies based on the calendar used + expected_date_args = (expected_year_month + + (_days_in_month(reference),) + expected_sub_day) + expected = date_type(*expected_date_args) + assert result == expected + + +@pytest.mark.parametrize( + ('initial_year_month', 'initial_sub_day', 'offset', 'expected_year_month', + 'expected_sub_day'), + [((1, 12), (), QuarterEnd(), (2, 3), ()), + ((1, 12), (), QuarterEnd(n=2), (2, 6), ()), + ((1, 12), (), QuarterEnd(n=-1), (1, 9), ()), + ((1, 12), (), QuarterEnd(n=-2), (1, 6), ()), + ((1, 1), (), QuarterEnd(month=2), (1, 2), ()), + ((1, 12), (5, 5, 5, 5), QuarterEnd(), (2, 3), (5, 5, 5, 5)), + ((1, 12), (5, 5, 5, 5), QuarterEnd(n=-1), (1, 9), (5, 5, 5, 5))], + ids=_id_func +) +def test_add_quarter_end_onOffset( + calendar, initial_year_month, initial_sub_day, offset, expected_year_month, + expected_sub_day +): + date_type = get_date_type(calendar) + reference_args = initial_year_month + (1,) + reference = date_type(*reference_args) + initial_date_args = (initial_year_month + (_days_in_month(reference),) + + initial_sub_day) + initial = date_type(*initial_date_args) + result = initial + offset + reference_args = expected_year_month + (1,) + reference = date_type(*reference_args) + + # Here the days at the end of each month varies based on the calendar used + expected_date_args = (expected_year_month + + (_days_in_month(reference),) + expected_sub_day) + expected = date_type(*expected_date_args) + assert result == expected + + # Note for all sub-monthly offsets, pandas always returns True for onOffset @pytest.mark.parametrize( ('date_args', 'offset', 'expected'), @@ -543,6 +684,10 @@ def test_add_year_end_onOffset( ((1, 1, 1, 1), MonthBegin(), True), ((1, 1, 5), MonthBegin(), False), ((1, 1, 5), MonthEnd(), False), + ((1, 1, 1), QuarterBegin(), True), + ((1, 1, 1, 1), QuarterBegin(), True), + ((1, 1, 5), QuarterBegin(), False), + ((1, 12, 1), QuarterEnd(), False), ((1, 1, 1), YearBegin(), True), ((1, 1, 1, 1), YearBegin(), True), ((1, 1, 5), YearBegin(), False), @@ -562,19 +707,22 @@ def test_onOffset(calendar, date_args, offset, expected): @pytest.mark.parametrize( - ('year_month_args', 'sub_day_args', 'offset'), + ('year_quarter_month_args', 'sub_day_args', 'offset'), [((1, 1), (), MonthEnd()), ((1, 1), (1,), MonthEnd()), + ((1, 12), (), QuarterEnd()), + ((1, 1), (), QuarterEnd(month=1)), ((1, 12), (), YearEnd()), ((1, 1), (), YearEnd(month=1))], ids=_id_func ) -def test_onOffset_month_or_year_end( - calendar, year_month_args, sub_day_args, offset): +def test_onOffset_month_or_quarter_or_year_end( + calendar, year_quarter_month_args, sub_day_args, offset): date_type = get_date_type(calendar) - reference_args = year_month_args + (1,) + reference_args = year_quarter_month_args + (1,) reference = date_type(*reference_args) - date_args = year_month_args + (_days_in_month(reference),) + sub_day_args + date_args = (year_quarter_month_args + (_days_in_month(reference),) + + sub_day_args) date = date_type(*date_args) result = offset.onOffset(date) assert result @@ -590,6 +738,14 @@ def test_onOffset_month_or_year_end( (YearEnd(n=2), (1, 3, 1), (1, 12)), (YearEnd(n=2, month=2), (1, 3, 1), (2, 2)), (YearEnd(n=2, month=4), (1, 4, 30), (1, 4)), + (QuarterBegin(), (1, 3, 2), (1, 4)), + (QuarterBegin(), (1, 4, 1), (1, 4)), + (QuarterBegin(n=2), (1, 4, 1), (1, 4)), + (QuarterBegin(n=2, month=2), (1, 4, 1), (1, 5)), + (QuarterEnd(), (1, 3, 1), (1, 3)), + (QuarterEnd(n=2), (1, 3, 1), (1, 3)), + (QuarterEnd(n=2, month=2), (1, 3, 1), (1, 5)), + (QuarterEnd(n=2, month=4), (1, 4, 30), (1, 4)), (MonthBegin(), (1, 3, 2), (1, 4)), (MonthBegin(), (1, 3, 1), (1, 3)), (MonthBegin(n=2), (1, 3, 2), (1, 4)), @@ -606,9 +762,9 @@ def test_rollforward(calendar, offset, initial_date_args, partial_expected_date_args): date_type = get_date_type(calendar) initial = date_type(*initial_date_args) - if isinstance(offset, (MonthBegin, YearBegin)): + if isinstance(offset, (MonthBegin, QuarterBegin, YearBegin)): expected_date_args = partial_expected_date_args + (1,) - elif isinstance(offset, (MonthEnd, YearEnd)): + elif isinstance(offset, (MonthEnd, QuarterEnd, YearEnd)): reference_args = partial_expected_date_args + (1,) reference = date_type(*reference_args) expected_date_args = (partial_expected_date_args + @@ -631,6 +787,14 @@ def test_rollforward(calendar, offset, initial_date_args, (YearEnd(n=2), (2, 3, 1), (1, 12)), (YearEnd(n=2, month=2), (2, 3, 1), (2, 2)), (YearEnd(month=4), (1, 4, 30), (1, 4)), + (QuarterBegin(), (1, 3, 2), (1, 1)), + (QuarterBegin(), (1, 4, 1), (1, 4)), + (QuarterBegin(n=2), (1, 4, 1), (1, 4)), + (QuarterBegin(n=2, month=2), (1, 4, 1), (1, 2)), + (QuarterEnd(), (2, 3, 1), (1, 12)), + (QuarterEnd(n=2), (2, 3, 1), (1, 12)), + (QuarterEnd(n=2, month=2), (2, 3, 1), (2, 2)), + (QuarterEnd(n=2, month=4), (1, 4, 30), (1, 4)), (MonthBegin(), (1, 3, 2), (1, 3)), (MonthBegin(n=2), (1, 3, 2), (1, 3)), (MonthBegin(), (1, 3, 1), (1, 3)), @@ -647,9 +811,9 @@ def test_rollback(calendar, offset, initial_date_args, partial_expected_date_args): date_type = get_date_type(calendar) initial = date_type(*initial_date_args) - if isinstance(offset, (MonthBegin, YearBegin)): + if isinstance(offset, (MonthBegin, QuarterBegin, YearBegin)): expected_date_args = partial_expected_date_args + (1,) - elif isinstance(offset, (MonthEnd, YearEnd)): + elif isinstance(offset, (MonthEnd, QuarterEnd, YearEnd)): reference_args = partial_expected_date_args + (1,) reference = date_type(*reference_args) expected_date_args = (partial_expected_date_args + diff --git a/xarray/tests/test_cftimeindex.py b/xarray/tests/test_cftimeindex.py index 0d6ba6b47c9..645a16bccc3 100644 --- a/xarray/tests/test_cftimeindex.py +++ b/xarray/tests/test_cftimeindex.py @@ -375,12 +375,6 @@ def test_groupby(da): assert_identical(result, expected) -@pytest.mark.skipif(not has_cftime, reason='cftime not installed') -def test_resample_error(da): - with pytest.raises(NotImplementedError, match='to_datetimeindex'): - da.resample(time='Y') - - SEL_STRING_OR_LIST_TESTS = { 'string': '0001', 'string-slice': slice('0001-01-01', '0001-12-30'), # type: ignore diff --git a/xarray/tests/test_cftimeindex_resample.py b/xarray/tests/test_cftimeindex_resample.py new file mode 100644 index 00000000000..85f734ddd64 --- /dev/null +++ b/xarray/tests/test_cftimeindex_resample.py @@ -0,0 +1,133 @@ +from __future__ import absolute_import + +import pytest + +import datetime +import numpy as np +import pandas as pd +import xarray as xr +from xarray.core.resample_cftime import CFTimeGrouper + +pytest.importorskip('cftime') +pytest.importorskip('pandas', minversion='0.23.99') + + +@pytest.fixture( + params=[ + dict(start='2004-01-01T12:07:01', periods=91, freq='3D'), + dict(start='1892-01-03T12:07:01', periods=15, freq='41987T'), + dict(start='2004-01-01T12:07:01', periods=7, freq='3Q-AUG'), + dict(start='1892-01-03T12:07:01', periods=10, freq='3AS-JUN') + ], + ids=['3D', '41987T', '3Q_AUG', '3AS_JUN'] +) +def time_range_kwargs(request): + return request.param + + +@pytest.fixture() +def datetime_index(time_range_kwargs): + return pd.date_range(**time_range_kwargs) + + +@pytest.fixture() +def cftime_index(time_range_kwargs): + return xr.cftime_range(**time_range_kwargs) + + +def da(index): + return xr.DataArray(np.arange(100., 100. + index.size), + coords=[index], dims=['time']) + + +@pytest.mark.parametrize('freq', [ + '700T', '8001T', + '12H', '8001H', + '8D', '8001D', + '2MS', '3MS', + '2QS-AUG', '3QS-SEP', + '3AS-MAR', '4A-MAY']) +@pytest.mark.parametrize('closed', [None, 'right']) +@pytest.mark.parametrize('label', [None, 'right']) +@pytest.mark.parametrize('base', [12, 31]) +def test_resampler(freq, closed, label, base, + datetime_index, cftime_index): + # Fairly extensive testing for standard/proleptic Gregorian calendar + # For any frequencies which are not greater-than-day and anchored + # at the end, the default values for closed and label are 'left'. + loffset = '12H' + try: + da_datetime = da(datetime_index).resample( + time=freq, closed=closed, label=label, base=base, + loffset=loffset).mean() + except ValueError: + with pytest.raises(ValueError): + da(cftime_index).resample( + time=freq, closed=closed, label=label, base=base, + loffset=loffset).mean() + else: + da_cftime = da(cftime_index).resample(time=freq, closed=closed, + label=label, base=base, + loffset=loffset).mean() + da_cftime['time'] = da_cftime.indexes['time'].to_datetimeindex() + xr.testing.assert_identical(da_cftime, da_datetime) + + +@pytest.mark.parametrize('freq', [ + '2M', '3M', + '2Q-JUN', '3Q-JUL', + '3A-FEB', '4A-APR']) +@pytest.mark.parametrize('closed', ['left', None]) +@pytest.mark.parametrize('label', ['left', None]) +@pytest.mark.parametrize('base', [17, 24]) +def test_resampler_end_super_day(freq, closed, label, base, + datetime_index, cftime_index): + # Fairly extensive testing for standard/proleptic Gregorian calendar. + # For greater-than-day frequencies anchored at the end, the default values + # for closed and label are 'right'. + loffset = '12H' + try: + da_datetime = da(datetime_index).resample( + time=freq, closed=closed, label=label, base=base, + loffset=loffset).mean() + except ValueError: + with pytest.raises(ValueError): + da(cftime_index).resample( + time=freq, closed=closed, label=label, base=base, + loffset=loffset).mean() + else: + da_cftime = da(cftime_index).resample(time=freq, closed=closed, + label=label, base=base, + loffset=loffset).mean() + da_cftime['time'] = da_cftime.indexes['time'].to_datetimeindex() + xr.testing.assert_identical(da_cftime, da_datetime) + + +@pytest.mark.parametrize( + ('freq', 'expected'), + [('S', 'left'), ('T', 'left'), ('H', 'left'), ('D', 'left'), + ('M', 'right'), ('MS', 'left'), ('Q', 'right'), ('QS', 'left'), + ('A', 'right'), ('AS', 'left')]) +def test_closed_label_defaults(freq, expected): + assert CFTimeGrouper(freq=freq).closed == expected + assert CFTimeGrouper(freq=freq).label == expected + + +@pytest.mark.parametrize('calendar', ['gregorian', 'noleap', 'all_leap', + '360_day', 'julian']) +def test_calendars(calendar): + # Limited testing for non-standard calendars + freq, closed, label, base = '8001T', None, None, 17 + loffset = datetime.timedelta(hours=12) + xr_index = xr.cftime_range(start='2004-01-01T12:07:01', periods=7, + freq='3D', calendar=calendar) + pd_index = pd.date_range(start='2004-01-01T12:07:01', periods=7, + freq='3D') + da_cftime = da(xr_index).resample( + time=freq, closed=closed, label=label, base=base, loffset=loffset + ).mean() + da_datetime = da(pd_index).resample( + time=freq, closed=closed, label=label, base=base, loffset=loffset + ).mean() + da_cftime['time'] = da_cftime.indexes['time'].to_datetimeindex() + xr.testing.assert_identical(da_cftime, da_datetime) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 59d14d7cdac..42c23bd7ade 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -2306,8 +2306,7 @@ def test_resample_cftimeindex(self): calendar='noleap') array = DataArray(np.arange(12), [('time', times)]) - with raises_regex(NotImplementedError, 'to_datetimeindex'): - array.resample(time='6H').mean() + array.resample(time='6H').mean() def test_resample_first(self): times = pd.date_range('2000-01-01', freq='6H', periods=10)