|
29 | 29 | "import numpy as np\n",
|
30 | 30 | "import pandas as pd\n",
|
31 | 31 | "import xarray as xr\n",
|
32 |
| - "from netCDF4 import num2date\n", |
33 | 32 | "import matplotlib.pyplot as plt "
|
34 | 33 | ]
|
35 | 34 | },
|
36 |
| - { |
37 |
| - "cell_type": "markdown", |
38 |
| - "metadata": {}, |
39 |
| - "source": [ |
40 |
| - "#### Some calendar information so we can support any netCDF calendar. " |
41 |
| - ] |
42 |
| - }, |
43 |
| - { |
44 |
| - "cell_type": "code", |
45 |
| - "execution_count": null, |
46 |
| - "metadata": { |
47 |
| - "ExecuteTime": { |
48 |
| - "end_time": "2018-11-28T20:51:35.991620Z", |
49 |
| - "start_time": "2018-11-28T20:51:35.960336Z" |
50 |
| - } |
51 |
| - }, |
52 |
| - "outputs": [], |
53 |
| - "source": [ |
54 |
| - "dpm = {'noleap': [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31],\n", |
55 |
| - " '365_day': [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31],\n", |
56 |
| - " 'standard': [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31],\n", |
57 |
| - " 'gregorian': [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31],\n", |
58 |
| - " 'proleptic_gregorian': [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31],\n", |
59 |
| - " 'all_leap': [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31],\n", |
60 |
| - " '366_day': [0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31],\n", |
61 |
| - " '360_day': [0, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30]} " |
62 |
| - ] |
63 |
| - }, |
64 |
| - { |
65 |
| - "cell_type": "markdown", |
66 |
| - "metadata": {}, |
67 |
| - "source": [ |
68 |
| - "#### A few calendar functions to determine the number of days in each month\n", |
69 |
| - "If you were just using the standard calendar, it would be easy to use the `calendar.month_range` function." |
70 |
| - ] |
71 |
| - }, |
72 |
| - { |
73 |
| - "cell_type": "code", |
74 |
| - "execution_count": null, |
75 |
| - "metadata": { |
76 |
| - "ExecuteTime": { |
77 |
| - "end_time": "2018-11-28T20:51:36.015151Z", |
78 |
| - "start_time": "2018-11-28T20:51:35.994079Z" |
79 |
| - } |
80 |
| - }, |
81 |
| - "outputs": [], |
82 |
| - "source": [ |
83 |
| - "def leap_year(year, calendar='standard'):\n", |
84 |
| - " \"\"\"Determine if year is a leap year\"\"\"\n", |
85 |
| - " leap = False\n", |
86 |
| - " if ((calendar in ['standard', 'gregorian',\n", |
87 |
| - " 'proleptic_gregorian', 'julian']) and\n", |
88 |
| - " (year % 4 == 0)):\n", |
89 |
| - " leap = True\n", |
90 |
| - " if ((calendar == 'proleptic_gregorian') and\n", |
91 |
| - " (year % 100 == 0) and\n", |
92 |
| - " (year % 400 != 0)):\n", |
93 |
| - " leap = False\n", |
94 |
| - " elif ((calendar in ['standard', 'gregorian']) and\n", |
95 |
| - " (year % 100 == 0) and (year % 400 != 0) and\n", |
96 |
| - " (year < 1583)):\n", |
97 |
| - " leap = False\n", |
98 |
| - " return leap\n", |
99 |
| - "\n", |
100 |
| - "def get_dpm(time, calendar='standard'):\n", |
101 |
| - " \"\"\"\n", |
102 |
| - " return a array of days per month corresponding to the months provided in `months`\n", |
103 |
| - " \"\"\"\n", |
104 |
| - " month_length = np.zeros(len(time), dtype=np.int)\n", |
105 |
| - " \n", |
106 |
| - " cal_days = dpm[calendar]\n", |
107 |
| - " \n", |
108 |
| - " for i, (month, year) in enumerate(zip(time.month, time.year)):\n", |
109 |
| - " month_length[i] = cal_days[month]\n", |
110 |
| - " if leap_year(year, calendar=calendar) and month == 2:\n", |
111 |
| - " month_length[i] += 1\n", |
112 |
| - " return month_length" |
113 |
| - ] |
114 |
| - }, |
115 | 35 | {
|
116 | 36 | "cell_type": "markdown",
|
117 | 37 | "metadata": {},
|
|
131 | 51 | "outputs": [],
|
132 | 52 | "source": [
|
133 | 53 | "ds = xr.tutorial.open_dataset('rasm').load()\n",
|
134 |
| - "print(ds)" |
| 54 | + "ds" |
135 | 55 | ]
|
136 | 56 | },
|
137 | 57 | {
|
|
143 | 63 | "- calculate the month lengths for each monthly data record\n",
|
144 | 64 | "- calculate weights using `groupby('time.season')`\n",
|
145 | 65 | "\n",
|
146 |
| - "Finally, we just need to multiply our weights by the `Dataset` and sum allong the time dimension. " |
| 66 | + "Finally, we just need to multiply our weights by the `Dataset` and sum allong the time dimension. Creating a `DataArray` for the month length is as easy as using the `days_in_month` accessor on the time coordinate. The calendar type, in this case `'noleap'`, is automatically considered in this operation." |
| 67 | + ] |
| 68 | + }, |
| 69 | + { |
| 70 | + "cell_type": "code", |
| 71 | + "execution_count": null, |
| 72 | + "metadata": {}, |
| 73 | + "outputs": [], |
| 74 | + "source": [ |
| 75 | + "month_length = ds.time.dt.days_in_month\n", |
| 76 | + "month_length" |
147 | 77 | ]
|
148 | 78 | },
|
149 | 79 | {
|
|
157 | 87 | },
|
158 | 88 | "outputs": [],
|
159 | 89 | "source": [
|
160 |
| - "# Make a DataArray with the number of days in each month, size = len(time)\n", |
161 |
| - "month_length = xr.DataArray(get_dpm(ds.time.to_index(), calendar='noleap'),\n", |
162 |
| - " coords=[ds.time], name='month_length')\n", |
163 |
| - "\n", |
164 | 90 | "# Calculate the weights by grouping by 'time.season'.\n",
|
165 |
| - "# Conversion to float type ('astype(float)') only necessary for Python 2.x\n", |
166 |
| - "weights = month_length.groupby('time.season') / month_length.astype(float).groupby('time.season').sum()\n", |
| 91 | + "weights = month_length.groupby('time.season') / month_length.groupby('time.season').sum()\n", |
167 | 92 | "\n",
|
168 | 93 | "# Test that the sum of the weights for each season is 1.0\n",
|
169 | 94 | "np.testing.assert_allclose(weights.groupby('time.season').sum().values, np.ones(4))\n",
|
|
183 | 108 | },
|
184 | 109 | "outputs": [],
|
185 | 110 | "source": [
|
186 |
| - "print(ds_weighted)" |
| 111 | + "ds_weighted" |
187 | 112 | ]
|
188 | 113 | },
|
189 | 114 | {
|
|
262 | 187 | "source": [
|
263 | 188 | "# Wrap it into a simple function\n",
|
264 | 189 | "def season_mean(ds, calendar='standard'):\n",
|
265 |
| - " # Make a DataArray of season/year groups\n", |
266 |
| - " year_season = xr.DataArray(ds.time.to_index().to_period(freq='Q-NOV').to_timestamp(how='E'),\n", |
267 |
| - " coords=[ds.time], name='year_season')\n", |
268 |
| - "\n", |
269 | 190 | " # Make a DataArray with the number of days in each month, size = len(time)\n",
|
270 |
| - " month_length = xr.DataArray(get_dpm(ds.time.to_index(), calendar=calendar),\n", |
271 |
| - " coords=[ds.time], name='month_length')\n", |
| 191 | + " month_length = ds.time.dt.days_in_month\n", |
| 192 | + "\n", |
272 | 193 | " # Calculate the weights by grouping by 'time.season'\n",
|
273 | 194 | " weights = month_length.groupby('time.season') / month_length.groupby('time.season').sum()\n",
|
274 | 195 | "\n",
|
|
278 | 199 | " # Calculate the weighted average\n",
|
279 | 200 | " return (ds * weights).groupby('time.season').sum(dim='time')"
|
280 | 201 | ]
|
281 |
| - }, |
282 |
| - { |
283 |
| - "cell_type": "code", |
284 |
| - "execution_count": null, |
285 |
| - "metadata": {}, |
286 |
| - "outputs": [], |
287 |
| - "source": [] |
288 | 202 | }
|
289 | 203 | ],
|
290 | 204 | "metadata": {
|
|
304 | 218 | "name": "python",
|
305 | 219 | "nbconvert_exporter": "python",
|
306 | 220 | "pygments_lexer": "ipython3",
|
307 |
| - "version": "3.6.8" |
| 221 | + "version": "3.7.3" |
308 | 222 | },
|
309 | 223 | "toc": {
|
310 | 224 | "base_numbering": 1,
|
|
321 | 235 | }
|
322 | 236 | },
|
323 | 237 | "nbformat": 4,
|
324 |
| - "nbformat_minor": 2 |
| 238 | + "nbformat_minor": 4 |
325 | 239 | }
|
0 commit comments