Skip to content

Commit 6257f8c

Browse files
committed
Start on implementing and testing NetCDFTimeIndex
1 parent 93d6963 commit 6257f8c

File tree

2 files changed

+481
-0
lines changed

2 files changed

+481
-0
lines changed

xarray/core/netcdftimeindex.py

Lines changed: 180 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,180 @@
1+
import re
2+
from datetime import timedelta
3+
4+
import numpy as np
5+
import pandas as pd
6+
7+
from pandas.lib import isscalar
8+
9+
10+
def named(name, pattern):
11+
return '(?P<' + name + '>' + pattern + ')'
12+
13+
14+
def optional(x):
15+
return '(?:' + x + ')?'
16+
17+
18+
def trailing_optional(xs):
19+
if not xs:
20+
return ''
21+
return xs[0] + optional(trailing_optional(xs[1:]))
22+
23+
24+
def build_pattern(date_sep='\-', datetime_sep='T', time_sep='\:'):
25+
pieces = [(None, 'year', '\d{4}'),
26+
(date_sep, 'month', '\d{2}'),
27+
(date_sep, 'day', '\d{2}'),
28+
(datetime_sep, 'hour', '\d{2}'),
29+
(time_sep, 'minute', '\d{2}'),
30+
(time_sep, 'second', '\d{2}' + optional('\.\d+'))]
31+
pattern_list = []
32+
for sep, name, sub_pattern in pieces:
33+
pattern_list.append((sep if sep else '') + named(name, sub_pattern))
34+
# TODO: allow timezone offsets?
35+
return '^' + trailing_optional(pattern_list) + '$'
36+
37+
38+
def parse_iso8601(datetime_string):
39+
basic_pattern = build_pattern(date_sep='', time_sep='')
40+
extended_pattern = build_pattern()
41+
patterns = [basic_pattern, extended_pattern]
42+
for pattern in patterns:
43+
match = re.match(pattern, datetime_string)
44+
if match:
45+
return match.groupdict()
46+
raise ValueError('no ISO-8601 match for string: %s' % datetime_string)
47+
48+
49+
def _parse_iso8601_with_reso(date_type, timestr):
50+
default = date_type(1, 1, 1)
51+
result = parse_iso8601(timestr)
52+
replace = {}
53+
54+
for attr in ['year', 'month', 'day', 'hour', 'minute', 'second']:
55+
value = result.get(attr, None)
56+
if value is not None:
57+
replace[attr] = int(value)
58+
resolution = attr
59+
60+
return default.replace(**replace), resolution
61+
62+
63+
def _parsed_string_to_bounds(date_type, resolution, parsed):
64+
if resolution == 'year':
65+
return (date_type(parsed.year, 1, 1),
66+
date_type(parsed.year + 1, 1, 1) - timedelta(microseconds=1))
67+
if resolution == 'month':
68+
if parsed.month == 12:
69+
end = date_type(parsed.year + 1, 1, 1) - timedelta(microseconds=1)
70+
else:
71+
end = (date_type(parsed.year, parsed.month + 1, 1) -
72+
timedelta(microseconds=1))
73+
return date_type(parsed.year, parsed.month, 1), end
74+
if resolution == 'day':
75+
start = date_type(parsed.year, parsed.month, parsed.day)
76+
return start, start + timedelta(days=1, microseconds=-1)
77+
if resolution == 'hour':
78+
start = date_type(parsed.year, parsed.month, parsed.day, parsed.hour)
79+
return start, start + timedelta(hours=1, microseconds=-1)
80+
if resolution == 'minute':
81+
start = date_type(parsed.year, parsed.month, parsed.day, parsed.hour,
82+
parsed.minute)
83+
return start, start + timedelta(minutes=1, microseconds=-1)
84+
if resolution == 'second':
85+
start = date_type(parsed.year, parsed.month, parsed.day, parsed.hour,
86+
parsed.minute, parsed.second)
87+
return start, start + timedelta(seconds=1, microseconds=-1)
88+
else:
89+
raise KeyError
90+
91+
92+
def get_date_field(datetimes, field):
93+
return [getattr(date, field) for date in datetimes]
94+
95+
96+
def _field_accessor(name, docstring=None):
97+
def f(self):
98+
return get_date_field(self._data, name)
99+
100+
f.__name__ = name
101+
f.__doc__ = docstring
102+
return property(f)
103+
104+
105+
def get_date_type(self):
106+
return type(self._data[0])
107+
108+
109+
class NetCDFTimeIndex(pd.Index):
110+
def __new__(cls, data):
111+
result = object.__new__(cls)
112+
result._data = np.array(data)
113+
return result
114+
115+
year = _field_accessor('year', 'The year of the datetime')
116+
month = _field_accessor('month', 'The month of the datetime')
117+
day = _field_accessor('day', 'The days of the datetime')
118+
hour = _field_accessor('hour', 'The hours of the datetime')
119+
minute = _field_accessor('minute', 'The minutes of the datetime')
120+
second = _field_accessor('second', 'The seconds of the datetime')
121+
microsecond = _field_accessor('microsecond',
122+
'The microseconds of the datetime')
123+
date_type = property(get_date_type)
124+
125+
def _partial_date_slice(self, resolution, parsed,
126+
use_lhs=True, use_rhs=True):
127+
start, end = _parsed_string_to_bounds(self.date_type, resolution,
128+
parsed)
129+
lhs_mask = (self._data >= start) if use_lhs else True
130+
rhs_mask = (self._data <= end) if use_rhs else True
131+
return (lhs_mask & rhs_mask).nonzero()[0]
132+
133+
def _get_string_slice(self, key, use_lhs=True, use_rhs=True):
134+
parsed, resolution = _parse_iso8601_with_reso(self.date_type, key)
135+
loc = self._partial_date_slice(resolution, parsed, use_lhs, use_rhs)
136+
return loc
137+
138+
def get_loc(self, key, method=None, tolerance=None):
139+
if isinstance(key, pd.compat.string_types):
140+
result = self._get_string_slice(key)
141+
# Prevents problem with __contains__ if key corresponds to only
142+
# the first element in index (if we leave things as a list,
143+
# np.any([0]) is False).
144+
# Also coerces things to scalar coords in xarray if possible,
145+
# which is consistent with the behavior with a DatetimeIndex.
146+
if len(result) == 1:
147+
return result[0]
148+
else:
149+
return result
150+
else:
151+
return pd.Index.get_loc(self, key, method=method,
152+
tolerance=tolerance)
153+
154+
def _maybe_cast_slice_bound(self, label, side, kind):
155+
if isinstance(label, pd.compat.string_types):
156+
parsed, resolution = _parse_iso8601_with_reso(self.date_type,
157+
label)
158+
start, end = _parsed_string_to_bounds(self.date_type, resolution,
159+
parsed)
160+
if self.is_monotonic_decreasing and len(self):
161+
return end if side == 'left' else start
162+
return start if side == 'left' else end
163+
else:
164+
return label
165+
166+
# TODO: Add ability to use integer range outside of iloc?
167+
# e.g. series[1:5].
168+
def get_value(self, series, key):
169+
if not isinstance(key, slice):
170+
return series.iloc[self.get_loc(key)]
171+
else:
172+
return series.iloc[self.slice_indexer(
173+
key.start, key.stop, key.step)]
174+
175+
def __contains__(self, key):
176+
try:
177+
result = self.get_loc(key)
178+
return isscalar(result) or type(result) == slice or np.any(result)
179+
except (KeyError, TypeError, ValueError):
180+
return False

0 commit comments

Comments
 (0)