From 0f16e2e2ff77530ce39bf37689f08267e9dcef17 Mon Sep 17 00:00:00 2001 From: tp Date: Wed, 30 Aug 2017 16:50:20 +0100 Subject: [PATCH 01/57] Added examples to pd.Index.get_loc --- pandas/core/indexes/base.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 31cf1e48b8529..b092b46e0899c 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2450,7 +2450,21 @@ def _get_unique_index(self, dropna=False): Returns ------- - loc : int if unique index, possibly slice or mask if not + loc : int if unique index, slice if monotonic index, else mask + + Examples + --------- + >>> unique_index = pd.Index(list('abc')) + >>> unique_index.get_loc('b') + 1 + + >>> monotonic_index = pd.Index(list('abbc')) + >>> monotonic_index.get_loc('b') + slice(1, 3, None) + + >>> non_monotonic_index = pd.Index(list('abcb')) + >>> non_monotonic_index.get_loc('b') + array([False, True, False, True], dtype=bool) """ @Appender(_index_shared_docs['get_loc']) From 2c098cd4841dced394cfdf82e2a454c6a5773324 Mon Sep 17 00:00:00 2001 From: tp Date: Fri, 1 Sep 2017 13:28:46 +0100 Subject: [PATCH 02/57] use target_klass so class name is properly updated for subclasses --- pandas/core/indexes/base.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index b092b46e0899c..d02854b6a38c1 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2454,20 +2454,20 @@ def _get_unique_index(self, dropna=False): Examples --------- - >>> unique_index = pd.Index(list('abc')) + >>> unique_index = pd.%(target_klass)s(list('abc')) >>> unique_index.get_loc('b') 1 - >>> monotonic_index = pd.Index(list('abbc')) + >>> monotonic_index = pd.%(target_klass)s(list('abbc')) >>> monotonic_index.get_loc('b') slice(1, 3, None) - >>> non_monotonic_index = pd.Index(list('abcb')) + >>> non_monotonic_index = pd.%(target_klass)s(list('abcb')) >>> non_monotonic_index.get_loc('b') array([False, True, False, True], dtype=bool) """ - @Appender(_index_shared_docs['get_loc']) + @Appender(_index_shared_docs['get_loc'] % _index_doc_kwargs) def get_loc(self, key, method=None, tolerance=None): if method is None: if tolerance is not None: From 9e425d637b0c635f1ec73407e6b45d1c53cd7fca Mon Sep 17 00:00:00 2001 From: jschendel Date: Fri, 1 Sep 2017 08:52:44 -0600 Subject: [PATCH 03/57] TST: Enable tests in test_tools.py (#17405) Enabled tests that currently aren't running. Small fix to make sure all tests pass. Verified that the raised messages match expectations for TestToDatetimeUnit::test_frame. --- pandas/core/tools/datetimes.py | 2 +- pandas/tests/indexes/datetimes/test_tools.py | 24 +++++++++++++------- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 9ff0275a7c370..9dde26f43ad33 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -605,7 +605,7 @@ def f(value): if len(excess): raise ValueError("extra keys have been passed " "to the datetime assemblage: " - "[{excess}]".format(','.join(excess=excess))) + "[{excess}]".format(excess=','.join(excess))) def coerce(values): # we allow coercion to if errors allows diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index 089d74a1d69b8..5152c1019d8de 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -25,7 +25,7 @@ compat) -class TimeConversionFormats(object): +class TestTimeConversionFormats(object): def test_to_datetime_format(self): values = ['1/1/2000', '1/2/2000', '1/3/2000'] @@ -372,7 +372,7 @@ def test_datetime_invalid_datatype(self): pd.to_datetime(pd.to_datetime) -class ToDatetimeUnit(object): +class TestToDatetimeUnit(object): def test_unit(self): # GH 11758 @@ -566,7 +566,10 @@ def test_dataframe(self): df2 = DataFrame({'year': [2015, 2016], 'month': [2, 20], 'day': [4, 5]}) - with pytest.raises(ValueError): + + msg = ("cannot assemble the datetimes: time data .+ does not " + "match format '%Y%m%d' \(match\)") + with tm.assert_raises_regex(ValueError, msg): to_datetime(df2) result = to_datetime(df2, errors='coerce') expected = Series([Timestamp('20150204 00:00:00'), @@ -574,26 +577,31 @@ def test_dataframe(self): assert_series_equal(result, expected) # extra columns - with pytest.raises(ValueError): + msg = ("extra keys have been passed to the datetime assemblage: " + "\[foo\]") + with tm.assert_raises_regex(ValueError, msg): df2 = df.copy() df2['foo'] = 1 to_datetime(df2) # not enough + msg = ('to assemble mappings requires at least that \[year, month, ' + 'day\] be specified: \[.+\] is missing') for c in [['year'], ['year', 'month'], ['year', 'month', 'second'], ['month', 'day'], ['year', 'day', 'second']]: - with pytest.raises(ValueError): + with tm.assert_raises_regex(ValueError, msg): to_datetime(df[c]) # duplicates + msg = 'cannot assemble with duplicate keys' df2 = DataFrame({'year': [2015, 2016], 'month': [2, 20], 'day': [4, 5]}) df2.columns = ['year', 'year', 'day'] - with pytest.raises(ValueError): + with tm.assert_raises_regex(ValueError, msg): to_datetime(df2) df2 = DataFrame({'year': [2015, 2016], @@ -601,7 +609,7 @@ def test_dataframe(self): 'day': [4, 5], 'hour': [4, 5]}) df2.columns = ['year', 'month', 'day', 'day'] - with pytest.raises(ValueError): + with tm.assert_raises_regex(ValueError, msg): to_datetime(df2) def test_dataframe_dtypes(self): @@ -632,7 +640,7 @@ def test_dataframe_dtypes(self): to_datetime(df) -class ToDatetimeMisc(object): +class TestToDatetimeMisc(object): def test_index_to_datetime(self): idx = Index(['1/1/2000', '1/2/2000', '1/3/2000']) From f7fe4295f84937bc0fa82c9718e62ec19fc36e6a Mon Sep 17 00:00:00 2001 From: topper-123 Date: Fri, 1 Sep 2017 17:36:00 +0100 Subject: [PATCH 04/57] TST: remove tests and docs for legacy (pre 0.12) hdf5 support (#17404) --- doc/source/io.rst | 38 ------------------ doc/source/whatsnew/v0.21.0.txt | 2 + .../tests/io/data/legacy_hdf/legacy_0.10.h5 | Bin 238321 -> 0 bytes .../io/data/legacy_hdf/legacy_table_0.11.h5 | Bin 293877 -> 0 bytes pandas/tests/io/test_pytables.py | 34 +--------------- 5 files changed, 3 insertions(+), 71 deletions(-) delete mode 100644 pandas/tests/io/data/legacy_hdf/legacy_0.10.h5 delete mode 100644 pandas/tests/io/data/legacy_hdf/legacy_table_0.11.h5 diff --git a/doc/source/io.rst b/doc/source/io.rst index e338407361705..f55c72bae5a20 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -4419,44 +4419,6 @@ Now you can import the ``DataFrame`` into R: starting point if you have stored multiple ``DataFrame`` objects to a single HDF5 file. -Backwards Compatibility -''''''''''''''''''''''' - -0.10.1 of ``HDFStore`` can read tables created in a prior version of pandas, -however query terms using the -prior (undocumented) methodology are unsupported. ``HDFStore`` will -issue a warning if you try to use a legacy-format file. You must -read in the entire file and write it out using the new format, using the -method ``copy`` to take advantage of the updates. The group attribute -``pandas_version`` contains the version information. ``copy`` takes a -number of options, please see the docstring. - - -.. ipython:: python - :suppress: - - import os - legacy_file_path = os.path.abspath('source/_static/legacy_0.10.h5') - -.. ipython:: python - :okwarning: - - # a legacy store - legacy_store = pd.HDFStore(legacy_file_path,'r') - legacy_store - - # copy (and return the new handle) - new_store = legacy_store.copy('store_new.h5') - new_store - new_store.close() - -.. ipython:: python - :suppress: - - legacy_store.close() - import os - os.remove('store_new.h5') - Performance ''''''''''' diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index e0963a1908bbc..81e52266f972e 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -371,6 +371,8 @@ Removal of prior version deprecations/changes - ``Categorical`` has dropped the ``.order()`` and ``.sort()`` methods in favor of ``.sort_values()`` (:issue:`12882`) - :func:`eval` and :func:`DataFrame.eval` have changed the default of ``inplace`` from ``None`` to ``False`` (:issue:`11149`) - The function ``get_offset_name`` has been dropped in favor of the ``.freqstr`` attribute for an offset (:issue:`11834`) +- pandas no longer tests for compatibility with hdf5-files created with pandas < 0.11 (:issue:`17404`). + .. _whatsnew_0210.performance: diff --git a/pandas/tests/io/data/legacy_hdf/legacy_0.10.h5 b/pandas/tests/io/data/legacy_hdf/legacy_0.10.h5 deleted file mode 100644 index b1439ef16361abbc0756fbf7d344fd65d8a1a473..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 238321 zcmeEP1zZ(P*WW9Mh@u#X2_kkN4d&3&T_)1fDF_yJcOWK;ofw$lV1pvkCDIMjDWKo% z-Q9bENA}+5jqmsV-q%&`Y|MXW&Y3f3&YUy5yGKi1Q&3>60ETb;{1`7L#N4HRevw=K zB_8G}>X^E|n%rTK>25N;-jCQ}V0;*UZ8Ujah`j$q;&xa$dEU%KLjyxOsryqYc^Ql4 zBlf8KQ0M=o{~H|8(oiwNDUBe>WgejY%+16JO0ISX6UXa_1RIP^XYS#0$dCS+X`AUG zLBmF%KU^*;xn*WTKiYl{?kf0?D*mH}4I|~Xw_N|-`}WSSrl(?Rips<5(I5R$^;lwJ zXpYmy&X3ocAG6RfG1WFSVCKiW4wYY8QVJd6`IVR{1NAgKztK`N6;(YAQ)^8_6MYpk z>%QZevD$y#LUMe1*g z6V4v?4(rf?t(&KvtCY3BjoVrWU-q#x>U{ViX0y)OR~n^M2K*S8cuM|-@JwhkH84~s z`Z#gS&r76z{ZhEq6Z^_RoTudYup@mwiq6p+>GMq|FXxP;&o`R9ypTga`n&x7tCYTz zBKc=Twi{a?ai>QIV}xG9^$Miidf2!-;9SS^ch}PflK5F%PgC^q zqkWX0Sy5jM*R$w9+LW2U6kn*6jq6X~?48~5jD6uqb=|Ezyfl8y%f`dr#@E^}(2H43 zW8Ej2_yqj zd!qjYnK_%1w6nEp5oPbWOrnLGuh0v`NAQ&GC~*V$Egdi}8U9XxFnf8T+pnh%%h zOhc)x(`be1DBaq2dpn*c?NLd=(~0BK#b=<@^R|l@o~~FueZ@?a-l~tfiKjod8n@%A z_Q=vDvru~KO3+C>-LX_@>ui*!R>mjdsl)~wu{kL95o~eC(^u#BM&apQ6Rqo#DD5iB zX~9#ywcZ+YQF?}t?>L@ThMX>(D|qOb%@(b_89*zvzoFW&c(_Fto=%wF2vY}K)qPd> z5{x#MD2lgjfb+g@D_$i&L^%unH}$4 z0n(&$ecJCA0zRXbyOM1U@bpu^x8+zp*r&>$n}7NPc%{!X(rRfbs2On6r8A%w)@c+L z#qX{M(=Ke-F>hfB=!>sw^t|SoSgHmo>KLr?@X=J)mDMuRRB~6a@K$kJE90VPr6Rk` z&);&bgR8cgbCBjb8!aCeoHRnm`+GsZJPf21jSPIAz?(=zW?L^9+{byx@;89<8 zn2L#s%2HHkHa*K9>5+>@#w(Vlh8law9r=;PpxpTOHQ0sT>Rva|E_3baPE6#?{P=jq|= z=d;$%&(lZ1OIl#M9bsKd`Vm&Nq^F~!uY;ejfa+RbXAcKoUv-;60WTQ=J8Ns(wa#vS z&K}m*0-m-m4t9P5Ua|rPekNXW0@LliOnxC{G+YR3c zIVJWnthdf&_hW8#~{Ef>1$=8fynWd>eT=0{`{w({XQ6(>p% zEy8VFF>xx9pBT-k*8G$HuW+FE98!xUJa$q~4*aX*ic~yo_e7$c>tuQC@s?XVTZ(yu zKk+bvkN?T>uGq1^ThYCgl={E@wqjF6+=zrEvOyXAZ5=y^=^N(`GW)AM}q zqlx5+$-{eJ>3K{t4;wwB=lR}8{N#z4DZQ`sJZ6n|r2nMn)qmd+TfhBvUfO;Bf&5?p zmHti2kI;9&;xs=X`~;L=hw2}c51ZZJrq3i=F@|zEFb{k8H+Fk)=g+13`SxMN-NT6F zZ}(3UhVhpPwtbpW16C*bCo7nB!4+01#Y;EmgUaof`JZh02(?#4YDJhQLiahvS`UW( zgsE>#RD-}>IKEz1cF3nw9lX_WEj<>Auxq zTe-LGpn3JcYLb}zFR^qecxim|5Saofqat!6{Co^Js4Zb!|Dh5VI%^I2wxbRj?(@x^ z99IHd7mK-l-CPbV*6@{wDZK)h>;~4`&CCJ3?-Fm^^vMA>Z?8V?e4Gzb4xL(Gd9@9` zo*2SAcuE7<`ouduKDHj-UdhODvB`mLzB4|2tFMQ`mpiAO`_u|o6c@iR=P3fpm(o<` zWmiK5$*;CI9ExDfhr@e=Q{thT$R`(%qg^m|#t6-E8J(bB>K%AKq7EK7I`u`r+-6{M zE?Ib3OdO~iza%d%D-|5Oco(W{`3enkopgrRCBy3ZEoq6r8la`u#jYD6)sR2j#?rYW z3JU(}7r1j$88Ck59QAZS3fTTZ-)>*wFKF6mX*FOU7V}8B|F4-zMR42d#wU?&Mev3C z!hvqiHDJ)*(vF0PG@vud@_FT@LU7UWQSgbRF4$PR+(EP+$;Fw_l2q3itfQ; zVo%)^(d*0wr$2n%niW|Jz0V}i{;bso-_4!lxBpc%thng8eD%F1aO(bY5Ao-v@WL{Xz;}(QhNe%TlZma1fsVPeo2#XX)EXlPcU>PJ zO+OjOKqYe@KWR%D4HZ3U&v|}Uy6U!m8gAakn%?g6{*G45*1G%I2dXXechPrK^j7o? zG}6&m(DN`^=dGmVp9Oi$iz`)nY*o!jj$r8=I{ zOSISMYI&&WnYh}j>#ft+Jz`GniTzhVC*RKrYgTzdal0-VfOJSr*3z=|$%I?-KtQB}&ZFRnJqzX(8J>ER2umQL}*6UE?&jIQK z(?yea*8*J&y<&%N%^+CvRX=rrV(1(9Khh0qhazL&ZTD8G1IqmK8=XsIz}kjziPO2w zaOHacy1ApSfJfKml_eh)fUAyr23-eU!v{&0Pp1#gfWBKgVr`>4pxhi8&xp^7;N!wK zUq%clfp}*Z=e{ zZq`n5dR|6(G0`rRKcGh^`E&4J&jHH6yjqbU)t>CNz4bn4edxu!0lwx5mrHUTkEa{4 zjEA|7X$i_fIrXNy;R}|~G9c4w%x5H~3HYIvNjB59WzZRS8y8O>Yy6Q7{Io27S{6U; zj8-MFPRj|{pan#OvFl{HgkRLd)5Dr5+h!g9909(5D)_JEqSZ)ls65kYZqjIR5-I`h z$)G)SQ?w_G_T=$Bv^WaCu0ZbNITZ1ITTf3WP>DLgugt>_&^iemftEWF2lxfF>dD5> zOB#_UiRw#@8+|_yf%{1*K6#+$EUc-8Y3Tev?gysICEAC1OZHP7NQSWd+B^h{crYnu=zS_$+<>c4)&pZ` zALj+`!)J>zIzStKuKT(lwmdU0WB39q9E%s(gKXp^#uxwKjyimq7JeO1@eS{;L>2ja zV$eftxGxyT;GRG{59Q$d-I{_PsGec|*eA&LG#ep{ErV64(Fy{yh$` z)&Y_WO4blU#d;q8pSRnP5TlKOq@1}1T3bU;-CD}pKt*2zKaa}@B{%pn)t}hvkT;Ec9vwQinJ@DQ2W{*CBNd7%ujypW+!@tMNbB9O$ z`1g1P?(nEjv&Wn6^h;T5YU?q3BHnH|?DiTCbPYzS-9?c1ke9h{O{yN zhK2uo<;t>f-Sy?~%T>ltqxuLU-AdJ4j)ni7?j-UvVXU9u(=X4u&p$@L0t??a{S@E$ zP#f@L1}27Q2!8>KHMP3=uk!=;x*y6u7h=vU z*mG_IjN3`w{VCM*W9;X_D0_*-v+RF1zWcm{T`q+q{7$6ZFJ)~{yxIaU7dK%jyB7Bt z;mtJ|Gr|*eo_H?_3rCWQ%`VY;6AZsE>5_`gDH| z!_m#t#t$)}kNQYctjFtvFA<~YApJT_?-#Q&R82^?8R3=dK5ytzADQm;!TllBeUaZx z7GDB4n0)H87k^~B*GH@yj;s&qc4O8E?X$Vl2VZ2n*Mq;?{fPFJk?W%#vK;C`It-ch zK>OY6LHVmVJ>RN;YJ*{%{PtEB%xp;J|7`z@9O%6sU5I3Ym2ppQP_ARDpK0~H{I`#& zc)@5x0;)uo*L(iRIbN`uc>}t50lsvWsyDNr6Y&DJitiaTU}65AL1L-)r~Lfg>))e}vgmC@B!;r3C=5;$?#id)0%4Zu0B!lCf4^H? zAb2@(X(O_$$TvK&aS?XJ#eDL{z2gG0X2kVBzkiPdz2gGwNI2!s`{&~Vl>Oe*zuY}8 z@b~@uH1Pq}C_xa?-A~JoOZ1En{C)pEm+^r<{rg!IBwtxR0^V75a{QsPvp6&1d$LMGK`TM4y z;v08`wzK|Se)_aN{Pa`9d$uI5jPZ@7kcq=TzkfLJhXa2&@P`9`IPix9|3w^N z+lznv?Rc{?&}*CUc>_7$<-*@?#gv9#{_P(AcHD7ASy`lin{q=j9m|v(in(l+azl}h zceXv=qh5df?#x!Ad)Fx8ckk<+jYiJQX2aS@{{kofe1toZ|Brv3;KTpgZ(siTwlzID z@$d4_KXoL^$t26`J#XafpVwmEfX+WZ2Ic6Xr|`Fb;!EJsr!`2|e7E>y1bM%{{qQ^f zdO!4x0I^PAWg5P4968_7FY>yhUv=g$8E|8&LNegS^v+WOQ>J&G3Yaq2@#6>hI&$Vh zell*zT*ptw4bd`wv{)al-zVb-l%Jw+|9f};6!U#8-5l_}^)dPV11`Q}K$Y9~dVR|O ziZiyG|D9Qd|2_Y^7*iIlNP+yT|GkcReJTn%t08_-?`gt$hx=&oH3B_+*2q1BpTm9R zxGNgJE$Nb`U2~7G;m0u((I1W>-6PoV{82s1L%uxxfm9qzwJYT#@9u-@fkFPeMI?rk zU%kx(Wkj15Nklilx{(iYDH&Z-BX-$-_23}l%&9*7>RQZI4HW86$}I+Q+!A+hcI?*90m{`9}^59iXK-lspDOMiNw{%|h+>8L-k^>ugp+5Ygp z{kfFB7DV@;){V3M;QyFEo9z$(o&F%&m)Xp$pYH9)t_RyM-d&gc9{)SJVf(}Trk}!b z=g+3(y|-QR@A7B2^x@C$xI}4Lf`zye#5a~ABo6=l{^7tM4*cQ39}fKCz#k6$7jb}X z?{$A~0XOT;DSM}fpOuUC-@o<$N_AgX{2Tu-<<4OGe<^nc)9FFEGwA)lT==Cae@~Bf z={@|?zgc(vd$$ZCfX&w){Q2XD=GqVa`|p;=lIAMWTAbI3$7~}yEB}8{pZ!A$_Z;ZU z4;{R@=RIuEw{P){bf5jx-@c;mZbR=LttoPbQM8 z*J|=U|LTNV*RD){KY2CDA8zNT|M+`G1erV^O%g<{gXPDa;b`+B`#Yz2)9&3we(FBN z(VyS{6%O?7=gK7EYe@Z1@uB;7squ5J!h=RNV~aBwIaN7%vK$N#32{P}mi--`S1HlUCh>-mgC{9!44BL`$w9Em}_kqM6u zu(#gN2lRe#B%8lI>E$fvRDSN>W6Irk8~T%9x5KcQ5H=0J+ashNy>BY?cURfx2q{P3 z9Y4!H_q#oQ^5nW*zq`slN9o_hz3g-UNqcO4^`HKJPWpcJkA9byT0cbf4^|wyUq5D# z;ru<$iOl9?kR5}HA5-xXs=u)NUH(3z{gv@$2&}Dlzhk!xcXU&B=tQy~gpqgdeIHK! z)Y|Q$;k>sEplwgUT~*tB*w#<>{cF!QP_#Yc+UG+h@WqOxVaFZXK#a_aZSEt!!ew0x zEVk)IgAv7BR_CWxLGv7K-ltg|@Y2logZ^K(!e_%}3*-~qp-|V{;M}5Iz_X=3TIX#F z3~!QE7%FrZ%yZuDBxh0yZ+mQVu2(65ftU7)SA}K(&FSe616)(UM|shqk&?0Ct55aZeCv_H>|W#_UYH37$WI&g)YNyAx92lg$oUk4g&rl1loL*P4Bd8FRmll9^=h|UrmgJY&mWkl5VtcVg zNg{mR3WDBSSAc@>#i3>!ia>5hyhOOjdlFn(Hg11F3QM)sr{3z+W5MZ=N0f9oQF%BxpaW1Dit^>5Ax-z_oSh`}}iqf#J?3 z**91#d^jX%hv|b_IH=^!7v-PLz%j_!@1SWr$mUJDZB+9WR0kfNqw*^SI7qlC-`W!k zIs(^rm941)l2^(_^e&e}=@q92j#^#|w)~37s+R8p*K1}4je1cHhu$rWD!W++vu(F| zoa6Zh&sDt1A26^2P99&n?|VxwoG3W;OYNg7$Y_+`W%aV3^14g&g&nW;Q@$Vbi&2o* z0bkYLif}>iy7zB8`2OZ4!I&Y@%P;1iONXBVTrARdJc8vrhhC5?ZinJImxn0$R6()H zyyA6!*AE8>H^EQ3YM=Qg zRKpjGjY89&G{CzBvx{aby@rjm%6-Ehr9jc(56Y#VKZ7fiv#Z+D%m>=OHC?D$soac^O21ci@~Eo&6n)06JTglm1xM_HaMvM=*@%X2Z7{rbzSL# zuV8_@UeWz|EpSK8Q-{2?0vJ5pW^=)g0?4=i#X(2(oe;s(sUf@Nf51b7Ewl8p>)`t6PK;pKJ-~DfLroDIhYl}BXgA(oGxhtuiMWCobV`vhQ8kt6#Tw}mr{yneqJpF zDvZhMqw|X4p^d@gwbiO&amCIZ!@noM8Ot*zN)zirqiWqo-w6dEqHgEpct#z3>0MMS zZS(^nA>e#Rs`+8c^R+1oduRB4cwnC(*h+2mwgpo zej8*rMBaVia2cA{kJZ_u*9_)1deztEmBOLIN%hj(KEdR*iWh4oGof~>FW>ob(ZHbQ z(F60J4CTUdL!Hb1d9YRHd<=g;G8|A*ENVWW98TyD1%}9d2HTH`WB1=Tz|;w)7o_4! z;Y2Id^ViS42MJG1<)sZO!F}sX;=GZ$(C?ex26f>&C_76^xW1+Wu8#g5^*X8!td1J~ zJosG$7zxvWzg{66rlMG$|EwH@9Ltd#i~3pO(X&QJPGtiRhec|$+c0GV>DAg_f(oGA z%YrXE=3+7ZX6br~eD8$ok9DaBoT~$E=B-PIZ%u`Z&2y^#HF%X@-Fx>^leZqagdQ?` zG5aSR^JVaciGERV;+L;RAUhd+E$@G5#@jOJyLqh0I-L%%>D#3mO|fRsAO{mcT@>1zh^K}?Pnw0`!uX@zI+o{p~!nleM=}fbvb>-8SQo;|3T=yh*bkz zUvbj)MO_n|{oM7sz|>M0`XKtQ)SE)k*mm#C*}Ph4Fis$O?lbyhThVG1EIQo{bg+y2=w@Z_T%on{?S@RVMQ!-GRGWRq@F=h-3v_pR<|5Uqz- zqZ&L{94my)Q7R=Tw&uX7ev?&iNR^=Waa!xwm$Il0T;4nV#-oHX9hXN*1^jcU%gb@kO8uw z7EeG>4a|1j;UksZ3U4{>u>aDY4~Ov?Shp@H0@GAXSCnka25Ze9S@C`-1YV&rzYZ_^ z3?A&8TrNB&8uE_1qBcvv5!CHoKUpfS5a#;l2Grlcl#6YghE}@N0o55>7XR8(4SjvQ zOz-Wf2M1-R$`|LQ!`y*qW>37B2*S(;Xv{j&0{m+}jE*h44Q7?YeU-kYVE%wB7YxNP z<%-3#%QwbWfJq_+b*(q+Kw++GwM%Rf6xhE|TwCTje5#L?#cof6y!DCujh7Tc>9r># zcOR*M^FJ;6;lsERFFtg=n;O3DjaP!J*7GeTraL6j&*(VM# zV)jZm2#@ot0FgSfu_G*!fV%f~Q!x2EXi#`^I{91;G?MRk`l0rBSiLw-Ml7xg?mYbI zV%zaHC^L88vu`3{;FS0IEC?1jqq;u@o=npa)_GhH!P3TxORx<-x-k0OkS&Gd2@ zFl?_?-jo8E_&hgUG&c`OcdU@|h)9AH-uIu^bw3Ua96E2PeNi)1w~ARNR@??3Ze3lQ ztH4mMdA&!dwK5%E^_8`an|l#Hm6jU+zOxSOS=#S`fNwp_%u(p#o81P)M6HAm&if2j zm%m&1aprgEnfSn|(5n-e`-*Jy2j78tko>I1&@MRA^SLHw+6Xfr34HjK)&y}xrss;&|^6a@XeDP0EMz1;X|8~Xm!_^wm7 z0&*>&LEZGi54A>Exj6pznANRt&4^0rA1}*5lF-zV^Q|FRZ=-B5aAy{X9xf|>PwqZg zudG`+Y%8XG;oJt%_hALdesztnF8cx2pE%>%b-D%)IrQtvORzaQij5I?l=W9Mb2bb|@_!ccKLx zsgUAdaJmJu*K-YGt}FBNvGMTr^7N$~=zXm#Bd6yG^Y`Y-?EIYGQ-l`Fq3!Q{mj(AD z^fmuy&u4hasWrVHfn85xKEr7q!p{GnJ}>3YpTVYwyZIbDKez2l_=ot(3FlwuvD|&< zo|?C^+jj)nzI9Cbp?)~cS2?#YaX$un%Q{YB_wU>%Fy)RY$jBRSM42qB|RM-eI5LK1ytAiI(s+I&|>|t#!fF3$vwp} z<1EkN_78v>f2pS$(E1s)w5JDJmBEPC(VQTh-vBB!-Y;Ds)&_Qp-<>u0ODp68t%hM$ z9y2zbF|3DD>4x{JPB*}~!9F6dpVf1NR%b9KMDF~x<7zsvXlm1U5h(yO3nf;~Zu~%x zmf}00G+9uE!H9d(Ja5mFHn9K7?A)p@OgY&h{O#Bqzv$^E(5e_FJ-ooV>6PuDO(+5jQKSAWu0^( zoOSfY++f=b;AK(8SEwFMU-sq@@m);1E93SB%pQ{iZjNr*K6XI`P&YUby=hAcec1$B zm4QuOtMkKg;yZBZqo8)PR3)@i7v4~Xy!^E7O`s9o@6EEV6qja!_LY0x;tw;F6PBF3 zy)C$ao0=oK7sL;p`T0!-eE2wM$7<1EzHQz1w>B~--{x}WMjbT5IIm1@uA1jXc z0TtIXp?t7G;;K`H^koxhM7KY`#n4Wt5)kEa7pqyB4gJHjOJv6X;s%ZAHu1Xqd0Rw2 z7_a~=YkZA-S@xD=bw}rMgGO?0^y$>dZ>I{tg5*hW9o!SZ<6{fnFY5Y9kDl;cXYv%B z?)#l_k2Vkd3FXbdxgR;70Ie@9kKQ{hh8r}Z`$drRTnV#xVAMQu?Q>TXVa3wHms2H5 zxk0O8jNBya)Lfl`?Y5g8S--y?3~txmoNdwu z72eq0($s_WXaNs?p{a;&2LGP`DJeC1Qi#3=X zdb$ZT(rY7pOMhK9S`U9Nyd;p|(gt<%*H1p4z*X654D2~?{57#tO+fnWiRfn{17bSb zjh38H=fHEKY(%$E!v52`=Uc$Fn-Ao4UlziJiD`L1ORDMVzJ6Hx4zs<@Grko)I@$RSPS8~nx)xwvmp z0bCPX9|<&D=+T-Fm+4HyI`_uv- z*p$mki+#Rv>eQj<%eEp!`jxXkUAToPm{JwQRxG5XSxq7qI=69=V!9~f{ zNr}6k!{p)mZ@q02>FFlQM(sT&$~;UgAsYr+`dmEowidd9a|S=AbFjF!w%y*&q-zM^pY zniFUw=Zi$%WQ4jj!0JOuy2{6|z%WMA=Z!l#aE(ACx|2lS+(AKcXgO-R`&IFL=xgE- z8Sy-Yp65w>R8nxd8Sy0}ALndr0M&jUzs+nc02zyRJ&3Wn!VMbHEwg}cmrPX(7_78e z=I#3e_&6&0w$JinZqSHs-5-I%(u1ntE?0h;nxXkHCTCzlJBo$T_7bAz$ZmMA?XL->R_H;hRdp z;MphI!`D>8QMU&D7;>M3J}1yfuMNHE+4#Vy76zK#+8J=E4qTsS_dWi3EPacuSUr8k zOkA(Y_?7gZGmlR>`P;ITrM_9PD@bZ)o7F>Z(1`BTj!m*28{R|z5cAi2-133Y^^Z|& zPV>ajN-9w{ve!;5nYWfztRUdT|(T!pA8zRSBf2@U~v8PtvBajuMud} z-cfrJY+qEjz?Gv8Td#Rv2@C>5wFppjlH5PI=etECXQeu}*6fo9T8!o`opW+&03wMUjNnT6V$@lDTt z#r&ULaQ((Y`3YT(;LxXjrH1=BXhi~z=)V1O9egCrP;NauK*u({9{hT!oV_=WD>Rbx zyqXz1%-f5=bHn|i%U{)lRR>R9HnVS}=NeHqYHuY*^frEx6sRc#kI6-rfSjr->8_(3 zG}EaoK_?O246HV5iXddvgX@Ct48?;!!Q{2sJu07@veCRmd;S1;*rEc6 zt*{FXSyKoPjnA4I+t@~5_Ku}WTW2%rK496XwDlu6D&q-+HYdZCd(9_Go{gX{n?R%S zdwJ@^>m>$C5M(Q{bVDhZYRW zE|~FIGQ>S5lN&VB=LP9kO|)M%z{B+`NLyd6jysW21k@>*MkfHcFpF?{~Y_JiVgWGEn z*S?JXvZx7MSUBc}ooOMME-hUx#&|_fH-ScUUv_z=Zd{1Pq<*|!si|56vM;_!Y^=M( z4I1_DueBGVO0zzI#)w=KUbI%PTsz_9^!O9ppiz5&7c~*jSIULL4KbHC-f4vA((2Z{ zY2?sDeFR(FnRM%{db>k;X9?Ic>{$Mc_&1<*QTjg1$N}^tFo8yLUd2;%dWCfo7|b}F zV>jjpEGT%gGD@x$(2m~(8nyS!FCR4w?xw;`SyMJ$(5r%jPQ6|lsD^P9uW7^^f1@Ed)GaPqDb>*Ko>^u*i@R9IX4hCR*1eaXD9$vyzR9_zn(q zJeo5`stTl4jW6TT7|80qxQ~EpSJJIGvp}=-m+!U)k9$akR|okAAF%l zcNOKdFzKGIcC96HOfksnT)y$_h;-03h+oOPlR?V$u3_3xjJ z^FHmd?f{bDqP>gP2k6{ayO+MLhvGqg6IsF;*o`W-de8&;p4D6}e0dhCTji@xzxTeR2gL}o?ppjk^U9fQeNT(Q3zI=>y4ORyY9$AYBw_l?{ zqY4-JwN*&q_PKw)c%tj(dbs$G#K(6(zJlrR`vpz7-^>jf-NyFF5aEv&`S5fEcGz}P zCTyKM;nCD{bef4(Hlq9P`Q~fU!#{wahW4(Ln$5tNarn4^$3AY#M(rIq*RB>DQVcGX ztY?IKyaHP#Mfq**=y;CGX2nB?he*^_l*WO<1zo}8_tk^*5l>3}vl%pFC<~4Bn!XVb zk2{tEJBosz-P+O$EW&gKKW(7%fV0p@&JT*2<~w$lfHL*m(lfH<@MFOTk*lV3^D!10 z(QOzOaJIoR6O8-VdDWsd4`f?Uo@7{0HxeN<84tx6VvWYxP4nyFtwp@q(hZeBRN0_Y zK!^j~tazyXy;*bb`!<0OD}?lvPS(McV(*4lQ@W@Ft8CQXDj`p!r@2*uoR7_ei{l!> zz8TZK=Ga8hw>MEXlJgS*5h^JU-hqB_^ZdattD(w}lF{;RbUsZ~HWd$LoOiU$+L&Aj z!^8(*>GHwN5*dG=L`W{PIXw=?Ljj}>ZAJqXp@qS(lN8bZ7I&;em zLpkuAjE6E9eE!0G>p#_jyKOfD8bSn=g-S(!Ovs_r$f#@rjr6&l#<6`$-Zh~AbNL{( zxE82AVgBC3Tj}gI78;FX=Z3dVG%#YsJOlAQ{^3`khgJ3=Uq?Ef%0i>|j(TOeDKzsn zsIOhf(>S0E1|*z0o@d9QY$_g#mF?LrqIx6>I%imJ5kAlfM~LR`b`7VSL8G!+@lcm= ztF2#PE0}-VM1s$_9*&i7&A+{kF51FEqxMeSZ;|AxTLm3Pj1tSamj_}igRB|f=zLr( zG@@H(#^KbcsuD0sP+`^$DptOO){0HL3re4;O#6%XCGDC}nbm`JE}BPGwtt_&VueJ*3k5;{K_zhWhwy{^wb3c5cVLFg$`=V~bfZ5i zn-veO$=@y))%5~Of8SP!4Q>WQkK37+2ei<)7=cE9rn8|V-+Zqx0hU9c-Kp6Pp#93? zs}oF{=xa{KLm7-u2U0fa&TIg81I$+@C>4T+*>!h;c@>R>vfx;bpLNVrzz2`Z$Vrm(9C(s%1LwT zn&HDXnVMs3D#7gS)1^hj=w>x6G#bC9_iAm4(D(t&G?&1W-*TYD&tFF>rqZp3VxbY; zHK+T%@PnhESeJx#d`unW4SMYSup@(>ZZaN# zbAv{e*gI_F(Nz#Vo#(93G+d|=o)NgPZ0sUBPZ0tUWg|HcJKR5A$Ug^`y-nC+7r+}c z@YJ`eexh-ZwttiHPzL52{~^7#G!@)dFj}-|QZbxvl^t_L{4;&c$#^KnC_QQS#dAeI ztO=R1;E`@7Fqpsl++7zAy@VAH9pg87-JD2-KI_efXHjP1#7V1*=}T zy1uvwgzO3*`n0PCxJQgjv>hMM4I0fu!$;Jfce+~xC)^!=#ARdy7%um|3PVyu%QY$< ziVZP+$*3Ch5KK*ceS^oh4u(6Vy{J0Op_dS7G=7I%%{scxuNpWWiX6LO@qm~ydLIwX z*iEg_V#A!_!8h#?s@Ivi+E7KM?Pt^ULif*R6G=GtM_?0sHy=>I5}_4 zI=v3KJTE7B$P*`eG%FtZ*dv2)Wa(SzH;n&s(7|_bg}XxBTFo!?TqEP57{h33ou9qJ zYj{vsaJ(xYrd-$fC5lIv!`y%s53P7KP-t{{0jQbscxK4>N~mevIBJp$hgl7QM&q~j zo%p@Gz88ZlMpNaMq9=d!$dlJQUmV?<(1+S!M-KqqX$fP<6DL1g|tUtJ>( zGYbNZ^x9r)HDAW4Pw=bQ=e2>uGC||hhcxRbn!7HEo3~Df!&f9|ICQA1&@cF`JRlPBp3NGd%dR&o%bD~$#^KnI8$8HIjK4w zrk(NAJ-@60@E^RlZ`wsVp9c$#B&9*LgD(uOXy{diicwU zYo6~^{8l{`=xQP z*Qj_XgO@S!s8?YVYy*pL?XBqqhjoi9Z>Foz%Q=BYbf?K*cd(t<4!jF)Khv961%3Co zb!?31FfU=nLnoiuexoAiD;&Ii_w`$^OJLQq&Tu)uTyDxndTrCyM)^ms-{3;GS<_Va zdY>B4)dlXt&CvI|LB{tW4z>eo73e2?+SKrlgN04T zLoo(#G{5nu-K8)#N#l-E zCvW61hLZ763=^9$&dTpg32Znf2WlJ9`w9{w9p<}nu&~K^D8?9ct?b4Q$4dCr?aiqV z7m`3%x#^UETsr?b;u?WQ4FVt*PhG^#m) zMms}iYJHfKnE>y{hOC&KR|mHl4E7qYNcXG*3ytJlpLg<{XXBp0*Xx$=-*WaUFs%I9 zx$6{%`8O*b`uOw>rP{Md8$NKqsdzIJtSDZg7-3sRFXs|OJQRKFNYd+Ja_t*9C&#<^ zd~poi>}Xusu!w_|OQ6ww%=X+(N0G@bz-ejc_LA>tL`uJX=<#w6BQSwR^O8lGX}=a( z6oTWsr}C+fX@cRW^44?~a*!4Rjr6%$xc$M!C>9%L$?tBL*$fo>PaW}a4~O|T6%WPC zqHc-HPG|&5l}ATd6ug3?bj{{We1_4tHyIDbFbH%c6Zu==`O%dlijW^U_xZV$mvtPp z0~rrxU@3yG>lxQ8(VTzs8OfQI@Zf=%pk!gXr!-v>2(Vk4`pEMmu`JdnI{PFGj0e8+QjN&ZJwXiCT<|hoUtHHyVv5JV^yz*PKqfwif`C zfW|ZaOE}C0S@FRyBcTI>#0qm#qhqWvA_BTR3<`S@BS*^n?X7zLtP>!_M!O z-B|?`CrN_lDb@5`Bhbh`zrJNx{7K0hP-l}wpvU-1AZePnS9s2M`t~N!sJ-iE4%wP$ zk_alFCnxLADuI*Eg<0rZaD_&4zM>?|PItj)Fud^m9p%;suxS5~XwOxV^fhP2LqE3i zR}Bv>gwLj|21Vne;O6BCbyh+t^ff2rp%@mv=hbe{rL8a_Z1?1@v<|51BN{r{or6v# z(1`B6yiVJWjcfwNiv$9&qu*ip5_75CIJ&qXk`@Au=xz^Hd|n+_4jlG4ZP`1l1X``w zS5{`iA;Lw)LmB6C7G2=oSP3lVD>CM*wSaA>;m%E*v?41WD)spDvSlxGz~pgyYfUBy z!~{kh`xr0rou21pJQTw^RXZd#zrKgtee?zo8}ec4nz6pM^W z?-zPK9wY|cSDWEp3gh-TYJU!+vs+NvgS2m`8*Js6%QTJ5+i)4?lHX4b^Q5OvvkneKF#XQn4k1qqvD~AFw9d#RWw=Us?5fiG3zy1#FUgWCpbdt{VkI<}m=(q>D9`WLbX z%&me=Efdui%4NYP`hm@j5h?UMC(wv)vw^Qp9n>uYS=U_-Wh$kDNX^+(-X+mJU5bjN z;-QSiI&v?6J~|7H9L5GJrtWpw^C#^u$mePDTNpUVa zH{^qvN&0J6Jg2jI5SomKqGwx_B5Gx{vq3}jx={4;OE_oP)JaDVa*$L4jpk!l8%vM8 z&QAid4+o`&<`saS7w24;4|z||H3E(Lcg&+lsmEO(0{pmq2y#oHt4MnP81 z(R?gLp!Hng=u+4)|4R9qhDH#wRW!_?Kiz6agr?%5C`Ogwx${;g4D%JcI%0qyG(LAI zMEL{NKyI zT=;ynv||+Q>KInd(fB=Wn7c^ysdjKA$zJF7t@DgB<&OA8ebWyt79XRdQ?g1aTQtyJ#J16PF%mlog6 zhO}cS84qP(Q*t^UTGU(0ux^Wx#dxq-F|Rp?@|sKp30ve&kH z6#kIOsK2JhYhqhe_P87I<^U{4VdbR%o~=>GWP34w6d7 zLovp+8NBkPt7G6(TVun=Vuj$=%ZHbeuG9GyQO(JCC!E`e!R?Sg+YYl7}8}Oh5-d!@`plEOk9CbFSAtRYXi?QON7xFKZ9QyDE z4B0A^aCGN87`OSwCW(*N=p~gE4=p#Db}>n=9qjKao+`I05B6_tcV96jlN&UW^E#>5 zvUA)Uq0K10HD*&1fqua1O}<8SaUw)F6%S<$2@y0%I`IJrS!)hPb~+d{-~^A$nJ4r- zXT?K5?X}y!KCu|SXc^ii)E{|>ekCb+iMDWqMs|zDzF6%8AF7~n=!wT&1ubBXRkYv1 z5p*+2RC885bZ?+u>CAKK0Dd>xo357uO^2@vI`ZQtH)W&ods9MQf?r}IxT^WWTWnJy zcrP_idEKHmdNeB@nlrLC=ego95G~oif#S@F(&8Cc7y8RfPkBfyya<0*( zzPRH{BaA#3kgM7l3kOW9nVYuo5Ixlf*U;>TgT=1axw!=%CfcKq_;mLtNz;+RnCjSK-xJJf9 zF|6P4F|$0zHp3|s`R!s9^I^i%{kh|oR?*j-KqI;X)>NL*UiA(>6K=eERQVztZ7^)@ zg&QyEYtD*?3Tf{@ZolU}m@P9fd*VSAo#zPrc;pe#GBb8;qy7Inw z=dd5}dqH}*RSXB)fsBWuH}1%XA5QRm38rcvD+yc17h_h=`)cBLy7dmIY$_hgfblCI z#ecsBi{1oZVECqhSL*WemUHUqxkkl9u@UVpdFf*5Fy`3!le09k;F+V2mN5(He44DX zk$t{skn5V#V-@h0N0|(ErxNgeTX(JXDTnc!iia}ZHJvUiUt0{#@~anb_?!%mCSb2a ze{mSUS@F=H!@o4kpf_qAF+!1tT^~WD;kom=j z;42N0JdITa^wL7cLoqDYT(M9ssR6bPGyb@-b{AZ5obRBm);D^dlkrdncH~p$Sd$S= zV9X3b$N44Ya9e43)~yK~xJJc8vD_hrJpL*HT?6T?UX2nBaE8kdG5tRf5ON%xo{w#n;=SS~+rCLId zrsAR4z4=mFk2kf!AcMU7x`)2Q83Jd23StlFxkkl98AJI3olY)BtJhP#c=Ri>LE-gf z{Y~c4tuaCEO`wt8GV48m?w-N*&|fWk)25jZfX3~sLj)&quzJXND1+fKbj%YJyjT8a zZuErzav@NC7Fkh%z6e086^W>#3Wh1%oe=kYcQ%Z+&kR%lUJJ+QB?eh}aOj}~8rdxe z#BYWcZE1m{wKVLW%zq0E=WoCFf9>6AT#Z}UKk&UvnUk5!awtPIDZ{avG^bHWNku9t zB~6ksLuMlLkSVi_8M4Z(NOMZ_sChRh(zBfHoLA?z&#UM2|NNidE3d9~t?PTOweI`2 zlEJ~<`x^FF2jj-5TL*Jj??0Pus=U4nW*=}$CQ#5;yg=(8Gex4acm)8a-9hbO8 z_h%*ezV{LQzTf}chnCb%EkE8rhe-8yjFG)`pMJlp@Uc_-cXyJkWB=TTesKRD6}|O4 zjotdHM#Vpy8mZi@67v!9=JD%3l*hX~v{7a%d-{Lc?z}!@Z5ovu+G~U9```VH$v?T- z_xFQ_j!&CU@HH+gq;0(3+(1{<)J&Vv6!-gn|8pOzxYH%5uLDnG%Pc=>@xu)?cTPYA zpZy&AZ&%B&`%oV5aAJU&Oa3XkPBv`+*;D~Ft-Q5&iQa$8&HA0C7k|;esgABU?o#cp z*hGSQw%ZyV7xC8f>pqmnugg4jCLy|zhDLl;Jj;InT<_6Syxv|f{Qb=Tz7OR;?mvHV z?Y<)Fn#z6@J0XLV-H!5}RQ|i4Ut{LK?n8Mz_wdX$mX{mpyjdx0?h^rBkrY=hyY>2i z$<69ss36b#F}{{gmv2>>EF&N`#og|HnBGEuYyRs#l*c>MwR*$Dr~)b(t2B7v#dsQ@ z6zYD=Uc@^8`#zLs6{4r!*zFUk)HZ8+eDo6aTdkq5Rd(a|GynTOl>N(f@8#4}Kvi!_ z9+ypMqN_)*i;E2v@m2HdK9u!)NQTmCN%ns#`E)t=Mo>!{txw*3(YyM;ENOB}3e^#8SlR@asO5&u_bzZJTc$PizOKO0K zhxSxZ_tB5fq}^U7>h+sgOm~=F36tt8Vu${^4`shetSC<<{7xm+PMi99mu?f?v!J(( zjMDG^T@71nf8U4lznzU48K>Dq%8LJp?ntO1ZQEbgE$S`e&Ewa7D37OLcafiUF z{F3_oJ8cg?&`Rlo+jDyq5w$%xCau_&^k1UcxfZ=n&f)2>5)xY;U@$xV4Rt1OpU5>Q z{(iOmb06wHL~3rWTnzcVLp-J7csZ4hR+$&0nDYDFzwSeMyd6C}GsCh>S8nZF}U;fL?*>}koC&?oQZY?BG zL4VhM_LB;N#@8#xKIc;VfA2T=Vnz^v00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHaf zKmY;|fB*y_009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_ z009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz z00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz00bZa0SG_< z0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz00bZaf&Y5~g+6T+vb{C= z{G%hC@`q(5k$rY2Hg6mwAm;5UL4LW7g#XJ#?AM>po}P{l-aMW#P>au3+i_&<6aRCh z-eZr1*`|7uGeXL_>3$jgeV{gSWsp3lPz z6{k0+&~tNoJX~oVOXrVmmDKDS$eD4P;$9l}6X;z&g)3Jh(Ikq?~e_iqrzY0k_+bc zC9}Tg(8w+iY-~=R=TsGDWPOy*?lmiOODk>D{V>FRS~VH{aJr^T-$D_AI()vivC+`$ zE|p~Jpoxc;ITsS=>;qk#@(MVoI^wb6xIccb)wX%5Wa{&T78qdwM;iGE4lfY8Q6WZ$?dq5r(Cl8GCqilOE63D!)+7aL|+W z{MgosVa5+{@HAFvY)G4JRU;yh_0h3&Xjn?#dve25>B{O;Su|^TcmjQ$AR>^h)kdEu z8#8)+p&|1&d>pm)8*To)DJ$BwNJJppk2~Ax(^#=)`t0naRCA3S!V7C}nxs-nxVF`z z&XY@i*2tUc)24WQ)GK;-{>Otim0hWmQsLmL&!PfZAI-I1N;fTQqz&iBsg86lChoK6 ze*JN&oO7zejI72~KPs6Lxtl7u%rX|F=hDl|+k$%*)rpuYTdRDlhr3?8wv!7wc9&nv z7ZKN-_ZDBaa&N21{famKsxd)5`h$MbO4?t4*L6&!&zIk(~D{1zP%0ZHWHkNl`xR4Rpl`?XLL`ekOlLeDleMzm1qsQ;uc%{ofAhG z$ojY^V)O%3-F&)yzFhyD@2TX-;A9!?@Jvpe)-kWYu^RdONTT~YhSYX#IB9Qg5*EtQ7^sIV1U+>^ixu(y=D{RSujpw;fIblZD$12|RciEbiw6E>V z@11>8sAkAKsiGBWoHYsqb@;p~CdcI5FNtZio4twN7#>ZDgG5p{d+v7V-nB8Dz-r`G zYHf2moR>&uK5XzfdZCsI?DG92!;&~N3IkamJqPz18(WZ1YGWkl41bhO9j`a<^h)K% z5eBkzHRPa}MdpZVI=|jnb>`&;^6*7VrPHN+PE}zbJ9FAQ4;o(F-9(4GuX7shdx9>w zcQW_3lYn!o$}5IUm-~6<@Q29v{d09KH9bDUGvjS3k<5HlyWu?dDJKkM&qu$N^)p7_ zDXc99qk**7QmJg3n|W2{W@Z^@#?5Wlx3U`fd^zodh z`SYIJ?mgQ(iQT$|Hhcbv;>;)vWczW>*YADyc5kMA{}@#0E6dXutb9mYDy3dTAbVfk z=O(Ec$a_U>#&2s(l=whr1xf5!Ggu&Es%))3daat^;vY_z551EvA*iOa#!Swcs#DAv zIC@yd82O)T)lBw@l=Z$6dSsqnl!j9&>Hnx;lX3rY5rJAfo(j**LDj#LZho^j=~zky z705~_ZVcvrA`3IJK8m{xtm*vrhI%bKljpoLg(fTV!uGAo5|NSBxFMaixtHhBOPd<^ z>)K_};6A>e-Oi?Q26|q1Tg__Z^Th(UNp5-EM#!HkP{m*AgH9Y#W`zAEY6co)380Dk39$c35s$BVj97M?y=SY~v235p{`25gD7rG`Jq+ z!i?%C?pKc#}!h zG$Fpv9OeG?oKqDBvi%rztXRqXRxGg#zol!KUP5~qZd%#>k~rt{Q5eW-EOS07e&I?z zIe4vIEnKyPtV$5Cdv~2LB9OfUoZ6^9_4tBjk`SNLxWvwv6u8FFZpY&|r+Tga=~Y%E zkGG=v?xTWY@z^1AR@^_eyn&v#iGM8dxQ=qE3Io}GOxNuBZsN>B8g7=?_m=aAs`ZZ2tAH8U=@TTfUBZOFtJ6a@v!eOcy$jP_KW*{k|#;WNWo=U$w{2 z`_+?8)^!VlFK3YPp~;iZUak_6k)1ho8t-%L$ZF!f@K&8pk2-pxRqczTv8buC{ivE{ z)_dLgR60OB?9iV5C3LsgT>I~rpK)e17*M7+>1Q9~x1Q7QWmG{=Y+JN8Ix>N#O!5i# zmN`kd`X~%!XO8jQh`FC+TWPpnp0e!RFc;Z^e z??l7n@Hwpm+%*a_vS&x11rlwB&l^e7YV{jpFG}c;e$wJvK~0=fJ$f9aw^~FXtI;T4 z(O$4TkB$p0eNvxONxltf^&~&ui3ntUe5V|oY~j~L>rX}R`<_%qH@)U5hsdW=u6Beh zQr|rJ=d{5iiN3F8uqS%(%aFgn?|Wda2%;tbeGHJTn`;MyVr* z$cI^X>vy1vbE?8XwjX20yna`=w4AQ#J$do6DXG+T)~)v;L*hjQvi&GHXk%l4tb~l{ zR`G4{cL9;PpxAfpe2xMz@dv1r;-GdDzf2q{-i-kMrg0Z_KE=fP;43rIW7&YbR^HEA8QOZWShP>52 zpO&naNj#I<^Ur&8XA}mq8pmr7%(-HCk?iyQR2OIXie7neCT>z4_h+CmkoEDSUyRhY zfL+vV;K0#Q$Fk^y<#LPa+PN2iFp#ZP@2fE<7az^18)n9w*r=63-CF&(_bSSuTx-?4 zcgrePBaiPL_hs^kh#I=n&Chsw=tp9j!Jm`q%KeQj3}nv^DXV7nS0?2&;a#@b^RNce zf98Ztm6nSlGP36I;G)T)iEraX1hN_xw&?cRKDC-I?|1Hw z)$=cOT-nvRIUBjZJK|1UIm;r3Io}hqj&U6ukFE^MD;9{`!h3{q*S3xUM363=EmB?gER|C+u(}z0k^piD`6n(W1dxB>afLCbgW_bci&`h(4kYB z&$=IY$%!KjWNX#+m~7?BUEip*_Hh}L2m#p^CoMLkphQF<>tofBC)4}SswH_f-#6}U z%%j6gpFHR@G?z2bc&*pmsXzDQXW1vd?zSEDnR9brmFj%5-${D(9)5Suw?|!bd* zhSq`K+llo+_w&6huLELZ1 z!a&x?%eAp)#+TQUH68vu$q2(l(mv z@AbIn_%Bo{!_r`vCil@H%*f81zRT^EV}jx+Q9M>=vG)$;C6A20cRiCcV`cd9qMv=_ zS?#cYGe<9j5y)z+$nV#1Fu#f%?RBH9&$D`BoO0{W=Ml-2 z>m(Ou{Q2w%ReMqKDV@#;te9yYf0ZWJNM@%!$`g^1eOqa(6lXYZ7SP1uW+UzQRg$h< ziSmJH?oZwSdpC8c7y=N000bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_ z009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz z00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz00bZa0SG_< z0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz00bZa0SG_<0uX=z1Rwwb z2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV=5P$## zAOHafKmY;|fB*y_009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;| zfB*y_009U<00RFj0xoKcmRV^n_gU@F&M)@_|P=sW_zt( zqxLTKe;_lx|9%0-5Nn*wHI*C`6<4{kSrjS6Oi5YYO#809`E8_i2lch7H0b&`!7^qb12MPDoA4nfnf|+ljf#%3 zm!*mMGDkLxsB6`_i{I?06S{96#vhYI7Jk2EqT6zVMp~!;IQ*c6V-}7x=KGqs`7JQ9 zVneK#eD}U$S|dGJXI2q$r-q!|E#Y^4{};-CkR@&HU(7Ltg;9X+>KQIdrdq5c51Oat ztqgia)^?k;Yn(w5@$t2CIBF42SKbg$aeLmtQOLAqPMXV1&AjziGmB+qo(HZktfKRe z$Y0rYE|vPlcr8{Nl0q9gqF#R6|B7Q4CSLBA>bh#~?iOqoS7skt7k9da>{;dFqBb#_ zm`~W=`><>Q4VbgC>U4WK#}H-#TGLktdIg#ovmti6kNK!oQBFq9TIq4XtCmcDZnU!f zP!m0$D!=J;)mM%}bOZh8t1A1f)b(UTbc=bS|17PO*qcVlPCd|7!$M$E^Y(55k=i!O zX^lb?#}Lz|&+ze3@iAYc#)b$882Of5Y#@geW;-^iHIf2_`) zGY{}GF`KTbqsN*$|9(G{%Q6kLVCCbldKqP8LC}u`gTZf!gM3+zO=bbdEF4xlu5wvu zK37|r&EnMC$h1!nUlNt0_j7ma6p{`_Tm4(J1@w6POQl(6r5r>mWM;1~K{+7%^*#eFs9JHKuSL+z5Xgl!P7`7Abr0!G@ zfyKMw6SsEI){v`9o}8?s{sp5`jz8ta{`(#=)iXCSSmvN)Y0gIY8d4wB&##JZu`|on z+F3(-j~%dA-zJm}IN`P6?CwJH??VKv)^bvwZth~O$Y$}et$6b3UF{@z!2o}Y_&lmL zB6COD>q7ccO@B9o^hT_Ui50Lc04})6J!&%^Vhv zy59Qc&RWXGR&0p2IeT?|A{*$6TaV7PH>6RO^=`H~ON+_lOKx`sAFDWq(4Xd}s%+%% z@8Q7;8RiijX<%PQO+9+gEE(2G0vEL0nL0L=o~-N8&K#1@F@&D`0v9cp!0FxwY>1aF zHgmU5`a(m7gie@Z`i^vVkN)wtwL*T+=xh!PH7y4(O+DXLW;$$vf40u(Rg!&= z$W$4ag^VvHzN;GEdW}e=A0m!gdWJM}3}Lj;#AemBWe$$ZnMK%Bx%DrGrP27sy+3;k zKGC}hi}Eab))OPgfZn>T>E&xef@1tqWL1Y^h)s0n1aaY_ULW<`RVc$9nW*Me{~ggmE3^YA^bijk~RF-KSr`Zl!JY#yi?roepGnG7L)47p+LkJ`q zr$Oz%2}9W1;P>%^`j&dym6yn_dpk}&Rn?_7Zlq+t`5P>bfhyK1*{4I@)+|?zL%QFT zms87(YyEML{9V50Qls@|_@l3zzsr>VF6a$3dE4v5i-N+clwTdfQ_@;<$lK~OnOY`O ze$&Q=ne#93h1y%%0xG}gI_1yLE8zdles+k=Cym#rR{CdQGnCI84ElJ>xUtvECS`&bf1KvP= z$QO*cAM*1&w~#0O4&wU`=Jze|3C7}nrpCTo%8{OobcE8KJ~4nxi(Pw~I-SP(h~pEv zdpVh>l8r1$u@MvL?r<3s=Y*`2|A@Tga)vuy(Y6!~VMTlRJ(lLjbF;W0-ld{T2*cJ3=gkAkzXS_6Z3W( zr$hEe>9#$xn=v0bG?CI*gtHM_)ZwT@e1ZT7fB*=900{IW0(G6GfsP3^^TsZ`*HiKI zdn@eje=Q1+>Ad(O*S2Gx3*C3&eIZ4-{7h>8(dSbzxKI9TmR+yQ{}Bk zj4Iw0`Pui+sXA=ov#vQ!XV)~$Zw!oIbowE`{MQZhUvh2ufBAo1F!{5{s0Z)u8oTGI z$ocbzKC<|`yCU=3&YWLRd-kNNf|WyVx+Z)1mdM4MR{W#r$<3~t9%(%5{b7>C355y&)q$G`v%v5CCj%iKkrl5{Icc}Z&UtQ-=z6n&%X1kXa3{* z`DyKImU~}tP5Z~?K3{2wY~Z-92yO)Uq?1L(^w2?#L>*BmMDrUE@cr`@H;; zk6ds5A^nNZ@?MEFJhd`??c*Cdgi(-$6UP2wY6o_Nn7u}&t?1W`R|u)W?g@OZOXdi zs<*l7YR@StYVjhFVIF|3TKKPhGY@O@H9MMgNIB?6~^yoty58JQK|Pz`1Kvq-tz&<-CfL z(7drVlm0mGqUG1#!olA5-GUt;f;r%^|GsEj{n@b^QRWg?|lEl z&`)m~yC?F4A59tC^oxgF6S5z<(*3~)5%0Q*S30&0?3&d!XUe?NV()^65zm})>B%oY z6S?i_KVN(4celIVeEknsz54t+k-uI(ebTzKw?%?;&pmTdVe5=eU;Pz3cQ}9XTh|3w zef;Iz4cj7VPi%MW9QKmy?kDD#t-17VS73a@-02Gn&#hVX`^#^+z3ZBPyWDsA{4d?K z!*$dN8?PUG&4-btd28E#6517MuN^HLdB?~dD>Ei`ZjYQ^+`g#em!Cy$`dv=NgKaNH zp5A@M^&egJNu;xCMs>bt!mQ$slOEZ%>W*b^xn4Z%#~W{rL|muca@EU|8@5NL-+W?q zY1$UoxlmLFJ9ak8MeOgvn}7+?V5brYv14Y_G6Kw17lu2>5wlX zp0g(u&L7(_rMTdWOGp0Y`PIz*RNGNG?*DFd{paSNZobj6HS+kTJ5PFdM_SkX&gyB! zK2KS5@vS$Uxpl?2wz$q(^Tp3M4f)j7^vh>{SI=&fx>{;ct#NZ_n-RV8PZH&2{3 zXU^j9|L)a2uH!ov3_Nnn3z6rJ$h~0qm=7Y?|MtnV&;Q{^u72dU(G?H?0T2KI5C8!X z009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH z0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI z5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X z009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH z0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI z5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X z009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!XNErbe z8^CP5v>aVxHp`ZKYmX3T8Cxqq8LO);D`Q-8k7d)BS=6EQ zBLAWKD@1#I?as!wK-k+7@P(YsZ6Rma+u-*($22t;jFG#HWwKn^LMm@^WwFPj%NuAa zudbr1w2VVArXSu7v$yZa)-tphbxa!Ee+bGm>!4rgyS&hI3@2^ z;%rHQI-aPHk@Rl@G!9qmZqC(dO_8z#^6zY%I;QcO%wE%Z8A_TlvR5lbPgQlPjZ2aL zUw$Ozuu9@M83EEyaEjt5ohr|e$e$C&}8Zi4KU^5dh-0LP~K+J>tNabwqV%rTj2A{%={w}KXK0yn~`6&KjdrbXbiWsah1z0 zQGV)&jh7(z^@#zTme{qYsncotSll-=_>Yb5gLW!GRwHuA9hx4}@etlWA!*b1FS{(y z#@gq0G&lQwvP_Xtcn;w|ntaWEZ`ddEi1RcaW*@KW>k=pVHiFBGO`N^q+RM%{>@U{7 zhiEw=FK^GqwY9~w#F5xkIclFV<;MnSYi6wL6l*_8Ql+{2Fz4jta>5S9<~$*}9QkAS z*yM6WH&*?c{ohcN zyr(S`_BA=Dd;J~0c3BVkr9pChmCdXxt(dCDRs3?8vd>KdL!Pkddo8D zt=1cu<1>a?pY^n)$9l?#WCtZ}9f2lqXb}ya zzWO(e8+HFBzCD>QnEjY1>M?=J|P9pvj~S=!51@`%@or z?kmv&vhV&XOZmVa-skDLZ>C4xM zauc;&&3BT^b!+T?Jm~#=$}L+*Eu_amzCc3u(P&pp|f2*P%e-Ul+IJgIoX(6 zucr*w=R?x}&ZAX{#M`Eyi;ej!JyEE1Z}#1j=Rr{;w{Q0ux-&cvW$QM7?TdX|*G(i; z{-JO&##bN3LK>q9X{+8LN8;5xi*?$nQos={~IE0^iW|3qHjc2527o7_%je=}p%F(3c<(g%w^`1p=ruUUBB%)dYTz|H^M z?0fv^w;w$EvVv^3wBm^));yMV_(kizPyMRl!w+38Gw<7R?Tb%M^Vo(vmMmFgFTefD zamPJ*O4i9YZq9jg+Uj+8?%sa%qYIbcwsq2tOPOQYyc6y{b@+qd{&4HLukHGx^qCWH zxO(m5o?{&jer@w9PS2=sf6?{vGmqXf?K$71d$Jave%Q3b#-4cHnh%2m?ary$ z@ARqdhy9}9k?_--HdZZs{&yqlw%IRnv-Hmn&3$9{;{z9NSv=>SKTIA|)iUajCq6Xc z*~-qxx1WC5SiU?o{#O$s?`}Bl+ix7T?$;ed55H^7Mfd-9MehH)9qDd&{nF3xT0iQs zUo}q{a>lNPaX)_M_GO=3amkpiZ$EgGs(Ra?vOTSZ0IP&|q+e%!L-Y^AZ8BG{R4d;9%?V6~FxYRC2sBtUV?Gj-pcI$eEM z_mQu|@mYOKJ?%N6oS{OmAu{sq2J zy9^c;`OUw_ke^q;|BH6I`4!RMAu^wYZd5DlJyc$oV$2&1`gqIuuTRvscKE|BvK{=A z@%tF692uQl&!dy|!ndgZC{sDP{$N2yZbg2EXS5U@oe?a|2nWYzc=KdMnQXR9Mf|GZ zq2A9J1&Z><@*mQZEYCAqi^(YA`g~ca*E71o8{)gXY|rTCwl=<7MAh)p=jwVCw?JE< zUbWrJe{Do@KiyhSZUt|nul-14x-aviG7mWEm6n`~(OkO{c-{f<7y^^VHLHBnc-4&+ZI-Shra`=xZTHjvSaHos6S8Q%p zG=vrH;TL#|V2=|7yI=k09$DY7(fcNP zUuD>PY&@RXztt6&$lnJum}|7fhl&o?$N|Ah<1qHM(wI-bbFNZ=l20>h@pp!XgSf!bveSv zQ9145P)lG=QNCdZs(x-?J*hu>A4*ieC};2W7Zw@qR?nkVJgCRR+nLCI$^Rr;58t+j zXO6QiIgfg-E{dCq*rGQUb%;+8009sH0T2Lzen{ZTVdF9aXKamJedxeZcWwT+Ye(n* z4SN5%ovyY3U)SN27Mdj-dCiEQ{dnQ)k=AAPPoH(lGm+Ih{&CTgs^3S#3;FXbm!qf2 z6{mlBL*193MNXOOob%+CXI*ng9R2qJ?w4HQxgnpA%hB`h?`>!2JTY*CYx}(O|9NZC zhmotF`u)$ZF5jl=<+C~c_{%~o-dQ(f)aFR-_Q_eB7w?F?_Rss)Z1}+%m#mk|iDzoD zy&B%QF6Z)hB2V49xU21hZIPxsKYrYH&kmAXtfx2Z%oz<8-~ZG4$c1;LUvlW|HzObY zW<&mW#=RAh?Q8HZ7IRXu-8cD@dz*jziRHRgWixI;cITmPYI-~5hFdOr5v zyYV;dxEEbl&wBUnm%2W5jk@#Fzpb;srt0N#gH>_tq9ZpvebTPouJhgJ{demHpGGFU z^4k9mulZcn%jL8f5)c3Z5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI z5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X z009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH z0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI z5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X z009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH z0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI z5C8!X009sH0T2KI5C8!X009sH0T2KI5ctXo*w_GOSBm+4x4zOJ^c zjB&|5mQ7z~4>u}#2c1CrjUrH9R$L=GJX81Dsna*=m0jpV))Hv)b@GD-Z%CeMZfldN zutBj+R7sCNhiiLXd7i50VO?J)E0<;1*g)ps|LQ90D!Bo(L-@}L{D(_rDYs~ zAunoou<2#Bo{H)!DWAsWsQ!Yye3{S2hRSm}@`&+Xsd31Bs$W5kQH?`;!g07{t`c-p zISbWsZ@KF1NDeXrG!9qmcF)!6&PB=&$gb{Ktd6;<$}O3_rgI6az3Oby#8XvWYGY|+ zwQTkks{E*cWrys%{GsG>bU)dt??;wm`rRSvCtLHAPL=mEQ+r18KW2Y+7{5;Xk^W5j zFzIQzU-Ngg2F(5_)_)B!=U48l$NsA@uR!V`laU{D@VpK-rJ}NqyF8Z76{vLRmU7fi zn@#N*{Te3owRZ%4p$30j0`x z)zSQ_CZ{~6_A?egf8*SafNpy7{HjpiX62b}Y_M#9TQKbRE%5neX8w_gpSb6U&B(9X zAM!PIG=^K+xXR_0C_nYX#)c@{(I*CQZHZlbnmSGPxVA<0_hDVGjqZnbDoJ0^Txgm| zoQCjz3Q3zD?si#^jkVA1Xm0lVw2cTm7WLa$ldswD4f|vsQL;nNP}A&bRJ5#^W0dAK zLYE*;{^i=s&N1vU*4~GRc8EgxCU4ipwY9~w)Uono1GF{s`N(|TV0Ag>GgqHk)|OmO z*rC{#{4lv3`Qy!<$>rR#G{)wJlgnwb{ho+BfnaR4zuw+wta>#2y`j8((xay>6!tYa zr+fV!zIJ}z$TvuitFoDOr4>_Ed*Y90g~-M-WSOgV!PfB*?H3pCUXHQ^TZ+bmb`QKQ zy&exTYpTXW@Ac)DAmiPqM)518ud0sL20qztYj#SH>1+w8Nw>_*Kl-Y_ReRGP_Zrf1 zFt@BSkw2r+MNWr~zXE?I?scMg*{prl-{j)Sd=|p@gL{;a1fU#Zls%3>c#8_r}~;& zya8uOs3#SEFjeE|Le4;J1e1Ac70#W?eb{vVh0NF!HTSuVD8(Ks|sjq$yZrSvB&o@qfh3+?%-uAGw zwr!!aT|JmCkPoKLQ^do|L7)y`>sh0?v*xqLX3?M(b!sqK8j#afj- z4`utx>|8?S9|{)}?}@}h9HWVBtKK08u6q2X{oN)nymlS;vckSslHmJaSQ?=00JNY0wB;I2qc;> zoRpw*BR`8jUx-df6Xe5b5C8!X009sHfrFO-#rHbBY98R$>tV+F@<18iQCd-3Cqj6{ zI*56FgVsTa0Ij>xIw`H=IDf9n?`fRvs$b~#=z<-$sAF0OrTUQePXdYJY~Q#|*#X(r zvfI`1zOFZ0=~1~mA5`VmXaQp#FZR9WNG?O;phCpE%lJ?pfgg+glqt4Ead0#qHvdwMhb}4IyZ3SC z_siCJP+Eo8#YW%9skeKKmEWN1rR&yI(O_)Vb;;$jb;dhZB$p#Uvi8>Ga&BGT++Qb` zv$l7C9VdzBl`1#&q=0NcS7FzV>IF=*m9& zH5y&?-iwa+_8xX`el3pPuH#%p@iOba>g`~S4_(q@J*k;J#et|%V8k7!PA}28(6M8+ zPDEX?dPzL5IsoxG^g6xI@oaq^Hx&O#sZZ+uweKC@TS%+ioC@2Oc2|9!w~ zlgl}DrsY>Bmn+wDJ8w=dH&M&Iabt2hb6fZ4ekz|!oV?+5ABo2A^)bG4?Ny2!@!pT| zoonyaD&zL8mdttoV&|{>X5ZFz6Vmy?7T@XCPGF|)K(ck#p2T-r^-J;Tlp5u$^~SGz zoM#Ow){>r{AZ{b-NG1OX5L0T2KI5IFb=^fu1($^`R;=*0BkM}i)J00@8p2!H?x zSO`#@=Q91hhj-|8u~f%-I{r^Bdi6BUbJ?Rx-mTla?OAn9>z{50&m`X?s`wgQ5$7pvFrKJ@`IQ;w+a? zdh=c{**NLFzLbR?3~`>x{2GleidUoKy~P{s&9B87A9tOs;yh*6ebrlkj`LJ{%8?#y zU*kM`u9s+>=d@|sXNihq^+NN#uf_MU?bkR@rH}aY0gCISaYM&n!MG9rP89#O9XI0M zPw^uMBF+=zru*N4F~?`6`uje65?2U)bk~P@-f7bVXUSrxtv4i>3k%)T)CFp@R#Ir6SZ8|)5+ybP2I1%seCSR@|M$mI#qF=s~=X} zh_^KkkiNf@(!JR?y%$fkULu|=w0*CBU8{`ep=>F$@BR|!nX8?^Ol`HsdSL82>o+mZ zb8>>w{7{0p4dJ`7fdB}A00@8p2=q?^y^ZtSm0-RQotXAdOz0H|fB*=900?}e2~eD8 zQ2*WU<$7H#)p4HN-qVX-dLcs0Q^cwsy@#Zfl;QYL$Ta(j zJJQEtZv+2p)r{4*)YGoKLhHq}POioGn$UQdxk1@qr>@Uf&yHPx6m?O3RDo6A{C-0G z&Na!OSzA4=h94X9vGJ*5$dJ78eL1RUHu-J44i@zCKU>*eA6^s`G}QM$n&00whrF#m z6Cz`L598k$8O^`I7iyP(tXzD5r1+cWRBwJ>0sk-BN%anq`6P6sTKT<`=J)E}V9>`~ z#+6hrn~=%Wio5tb=b~Qz=azh(^^I+Q{fCo8y+@hq&GiQhGIA^OGd!cE=;(}KVMaJO zHp838r9^w&G9?b`?|K;pit@w{^+T5D8C^0u!|NGc%s*8zyd0qgA*Xl*+5+{eZQf4t zhg(G%-f6l1zwylsBoAXStq*3>vIXS6R%_P5C!_KMbZv<7&os@$HnFIR~3_9^=a zcmwqzUr@HoY#;o*lr!|J{@$PbBlf(;M1Plx_}JKCl9fZBh>bg4C9faGD}9;QBpJyX zlYf}&EzzRn+Y{v!TtwfBwSchaWSfv*s|DyiWzA+aj%!3gd^5d=S@kb*5%>JmZxTKO# zAo%#|J}#!0#s7P=g?Zy8b3?wTew^MjtMNOfqO7vCKEJ-Ic&eygjz4p|XgnC>&z!HI zTRveKH10}d>qLLz)mNA>AB_#eFK6*G9E{pIrJ}M@6m1@V;+X0c`(lenRwYuhebom-_zvD*i`E zJzPGK-e}w;TJO7P56>KDJ9c|_eKxe$Dy^&e-(khtp5a$)DY|#FjQNGvQAC(C@wTCYvD2u!+gmpI7auZXGsg zc&;VO(j7}}*?g|U1z&k_Fw1wOy_k2(PM4KV$aM?VI*Q z9{IyYH(Pq1eOURQhW$^E>mnoJf4_zQYA+of)G19;`%#x@*4vM|L<Rkpz(TYXaqE#WkXXGj%VWI?dE} zqz_q3pvl+C4@&fjVx0;b6kBiYWw^H2=RZ^Sx8-w{X0mcwo{bG;4*svMqOOt~Fgt|* zoWOsmz6#MEU%RuhEfDs$1biW9b6dz6_BQx^&M{5RW8@x}&t$p$Q{+?SO|C5Vc=)j> zZ=gKSKloNxQB_*TAsF(ab_bhYR_m##u9EVig;ak*UcSs{V?*V+9C^feuhcj!+O7H% zRKtm>acECC4jTJ=(Q&nARV`VL@B)X^AjY2zxFTcZ5b4;vezY)79Mz_lfI?P=;X+2d}_`(a(K zjqZnbDoJ0^Txgm|oQ81w6_Pf6AG6DPY^;55M{~2^r)@;ov8dn1ntaWEZ`ddEh>{)J zv!vP6sAyR+$0*Hfgf2mx{L8hMonzQzti2Br?GT0XP2R4HYio;Vsbl5G254($%sbM=`=2PT&jb||(7Y{})wA0L&ST+S^^W9-Hu$>p@zeoxZ)$Q6vO_Sf6{j8%_j zzc-YZPkQvUg~Glj=X9^X!`IHw8~FywaaA_6uC!vRYL6o3}jQgUp($@z8sHxh2YY_o-2mue{8vI$9g}WV@}|DLtmMC7>qV z>T2Khw`y?gGjXpI#mi>xtNtbzPv%oV!=LqDfBBk6 z>~m8Vze3}!)*G1PGlpBA$J?@A>nlgKQ_myh0so+zL+bQnry`k?OorDJs~+T9rN*otxYqeMJ&#^7OwEv&i^0h^n|?l|b%mw+Ih4{= z+-F3S((Ly4N9f5pPt+8WpO2jnbe~6=*Ne^bD87tbS6e;9G-HbvDZ0hzdDJP77(0G| zu3R5m6E!PhES#2Hj%vC6%jD~7<+_5RtmNfQ)N<(=$>pr=-CxIKH$_@`^qg&spV)PJ z(UaKwhg&u|-t&#qBwy)m4?Any7CPJ2gXseKVCp=DoENS4Qw53ae1(3mMtZQ?Ijyfy zx;Hx~?Ie4y)OJ4dNG;40%J!AnxrE9;6fP#-6N!a5Mibdqy+aPht9KUbv{k1ne`PI| zr^|_+Abk=~lIv|s_H;#zJ*klp&pw9pN~r#*eNabr1ET8-e1P)*d~?fB>pEY+=l8Yx z0%18h;S$z$7OE#h=H`Bj)?@gwQl?X0>1(;-eUw@KZ?P~U0Yjn+wN9jER@m8++5wy&I|t@NneQ+0f8jTSK0@nYX=j^Hvh4k|>vyNnMl ziN=PCB8+wL*!M_?4;B1WeC0PUJ~UI=jy53cdx|u^N-ZuO!hgh@q(pI4^nQZYhv|54 z?&Z33kp@eblsZf>JzWz zCzs3Ca&PH=Sj(Yf@*}$^B`?pdGu${Xxg3?dztbe~yi(q*V* zDGr4E8tEr>dWpt`PMfA~eX^=py{J0?*||9m4lC{r=OU3 z(68;d5%+#=-mE0W@t_ztYCVAX9_Tnw=%e@gFwf6Z9sdb^^j;rve&0YxU-cy(H+kHr z^*NKC`$m3GJ>SgMH~LiEKZLK~ah}iXqQ!b5J%;w`xiVRtC$;|){T$b++fRu#?z41E z^5-};>*2o-_?_f(6le8hZgRPD-H=l!B$u10HT1f?+j}vFjIFR**a@a;ybPS zrTBD8jq=rcQVA;t)}A})G@7l z>eER2DS**dP_j7BwfuUj<2*%~V%uAsXZkcX&bp-Z{)_W$`hlufR~wV4 z(azYT=O&jEb||(dbwAWm>?iq=_ZyR!=hj&-sZTB!Tf_c26DN*M4n5vlsq)^xpHpuf zD4q&&o=T5NI7`Fm&3ip_yoEI$y7&5W%Lc~N*Vir1GnrqbxK4^!qvO5B8|=-m#nIbs z9GECxX5Cl4_2)QGrKcQaSNr(=pFP)0G|ux(?zkvhoH&r&Zgy^dPu}0-Je59l|1IBt zOmUqwZs_a=DdTz&zprGDUrF`%eJ;_8-upZn z`XGIn=N$(k&J*LK``<}A&~ctZAG8j#Td%Lo=I6wgiu;EcqCC#?foA>aM9(Cm{NCI@ z%zniBeV){QzkZJE6os2MYn3ikMHyBpN4>5fdB}A00@9U5&{(GdAt7p!YB2**uKVjcG~{EzQD%v9Z$ONx#*HF zjydh3%OVrkzTiK>o_=fk7bC|7u4PLXk3K5h(KPsiTQ?2;k&w5E1=~Nb+ELv)Y|!xB z%$)q$T}y0f1J%yb9ZPNeMV*ZczFfdpdK_s5uiXFMg^lT1%NMSHYg~KAaObwOR!#ri zPmVl$iTj@a9zN8b?#On!m+-%Aj2%FLC4A+hVCnhxtZxq)R?wTFynb*Nb7rQm{>Pjz mvu?Zi_G{KZaK@%LKb(GY_pu*!0;Zk%q2GJwO*Wy&mHmG_5(N4H diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index f33ba7627101e..b5ecc4d34cd08 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -4599,41 +4599,13 @@ def test_legacy_table_read(self): expected = df2[df2.index > df2.index[2]] assert_frame_equal(expected, result) - def test_legacy_0_10_read(self): - # legacy from 0.10 - with catch_warnings(record=True): - path = tm.get_data_path('legacy_hdf/legacy_0.10.h5') - with ensure_clean_store(path, mode='r') as store: - str(store) - for k in store.keys(): - store.select(k) - - def test_legacy_0_11_read(self): - # legacy from 0.11 - path = os.path.join('legacy_hdf', 'legacy_table_0.11.h5') - with ensure_clean_store(tm.get_data_path(path), mode='r') as store: - str(store) - assert 'df' in store - assert 'df1' in store - assert 'mi' in store - df = store.select('df') - df1 = store.select('df1') - mi = store.select('mi') - assert isinstance(df, DataFrame) - assert isinstance(df1, DataFrame) - assert isinstance(mi, DataFrame) - def test_copy(self): with catch_warnings(record=True): - def do_copy(f=None, new_f=None, keys=None, + def do_copy(f, new_f=None, keys=None, propindexes=True, **kwargs): try: - if f is None: - f = tm.get_data_path(os.path.join('legacy_hdf', - 'legacy_0.10.h5')) - store = HDFStore(f, 'r') if new_f is None: @@ -4671,10 +4643,6 @@ def do_copy(f=None, new_f=None, keys=None, pass safe_remove(new_f) - do_copy() - do_copy(keys=['/a', '/b', '/df1_mixed']) - do_copy(propindexes=False) - # new table df = tm.makeDataFrame() From 8351f86a0079b6b0cb95414807a2c2248530ef2c Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 1 Sep 2017 10:11:40 -0700 Subject: [PATCH 05/57] Tslib unused (#17402) --- pandas/_libs/tslib.pyx | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 5dd30072fb7aa..50e0b77c6d3a0 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -2622,8 +2622,6 @@ cdef class _Timedelta(timedelta): int ndim if isinstance(other, _Timedelta): - if isinstance(other, _NaT): - return _cmp_nat_dt(other, self, _reverse_ops[op]) ots = other elif isinstance(other, timedelta): ots = Timedelta(other) @@ -3882,7 +3880,7 @@ fields = ['year', 'quarter', 'month', 'day', 'hour', 'minute', 'second', 'millisecond', 'microsecond', 'nanosecond', 'week', 'dayofyear', 'weekofyear', 'days_in_month', 'daysinmonth', 'dayofweek', 'weekday_name', 'days', 'seconds', 'microseconds', - 'nanoseconds', 'qyear', 'quarter'] + 'nanoseconds', 'qyear'] for field in fields: prop = property(fget=lambda self: np.nan) setattr(NaTType, field, prop) @@ -4620,7 +4618,6 @@ def build_field_sarray(ndarray[int64_t] dtindex): """ cdef: Py_ssize_t i, count = 0 - int isleap pandas_datetimestruct dts ndarray[int32_t] years, months, days, hours, minutes, seconds, mus @@ -5270,7 +5267,6 @@ cpdef _isleapyear_arr(ndarray years): def monthrange(int64_t year, int64_t month): cdef: int64_t days - int64_t day_of_week if month < 1 or month > 12: raise ValueError("bad month number 0; must be 1-12") From 1981b679b0619de0765c2009684ce4abd886189d Mon Sep 17 00:00:00 2001 From: topper-123 Date: Sat, 2 Sep 2017 12:50:55 +0100 Subject: [PATCH 06/57] DOC: Cleaned references to pandas ` .. ipython:: python diff --git a/doc/source/dsintro.rst b/doc/source/dsintro.rst index 3c6572229802d..4652ccbf0ad34 100644 --- a/doc/source/dsintro.rst +++ b/doc/source/dsintro.rst @@ -73,7 +73,7 @@ index is passed, one will be created having values ``[0, ..., len(data) - 1]``. .. note:: - Starting in v0.8.0, pandas supports non-unique index values. If an operation + pandas supports non-unique index values. If an operation that does not support duplicate index values is attempted, an exception will be raised at that time. The reason for being lazy is nearly all performance-based (there are many instances in computations, like parts of GroupBy, where the index @@ -698,7 +698,7 @@ DataFrame in tabular form, though it won't always fit the console width: print(baseball.iloc[-20:, :12].to_string()) -New since 0.10.0, wide DataFrames will now be printed across multiple rows by +Wide DataFrames will be printed across multiple rows by default: .. ipython:: python @@ -845,19 +845,16 @@ DataFrame objects with mixed-type columns, all of the data will get upcasted to .. note:: - Unfortunately Panel, being less commonly used than Series and DataFrame, + Panel, being less commonly used than Series and DataFrame, has been slightly neglected feature-wise. A number of methods and options - available in DataFrame are not available in Panel. This will get worked - on, of course, in future releases. And faster if you join me in working on - the codebase. + available in DataFrame are not available in Panel. .. _dsintro.to_panel: From DataFrame using ``to_panel`` method ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -This method was introduced in v0.7 to replace ``LongPanel.to_long``, and converts -a DataFrame with a two-level index to a Panel. +``to_panel`` converts a DataFrame with a two-level index to a Panel. .. ipython:: python :okwarning: diff --git a/doc/source/groupby.rst b/doc/source/groupby.rst index 937d682d238b3..53c0b771555f8 100644 --- a/doc/source/groupby.rst +++ b/doc/source/groupby.rst @@ -140,7 +140,7 @@ columns: In [5]: grouped = df.groupby(get_letter_type, axis=1) -Starting with 0.8, pandas Index objects now support duplicate values. If a +pandas Index objects support duplicate values. If a non-unique index is used as the group key in a groupby operation, all values for the same index value will be considered to be in one group and thus the output of aggregation functions will only contain unique index values: @@ -288,8 +288,6 @@ chosen level: s.sum(level='second') -.. versionadded:: 0.6 - Grouping with multiple levels is supported. .. ipython:: python diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst index 53a259ad6eb15..4687e46490562 100644 --- a/doc/source/indexing.rst +++ b/doc/source/indexing.rst @@ -66,8 +66,6 @@ See the :ref:`cookbook` for some advanced strategies Different Choices for Indexing ------------------------------ -.. versionadded:: 0.11.0 - Object selection has had a number of user-requested additions in order to support more explicit location based indexing. Pandas now supports three types of multi-axis indexing. diff --git a/doc/source/io.rst b/doc/source/io.rst index f55c72bae5a20..f68358764a40e 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -364,7 +364,7 @@ warn_bad_lines : boolean, default ``True`` Specifying column data types '''''''''''''''''''''''''''' -Starting with v0.10, you can indicate the data type for the whole DataFrame or +You can indicate the data type for the whole DataFrame or individual columns: .. ipython:: python @@ -3346,7 +3346,7 @@ Read/Write API '''''''''''''' ``HDFStore`` supports an top-level API using ``read_hdf`` for reading and ``to_hdf`` for writing, -similar to how ``read_csv`` and ``to_csv`` work. (new in 0.11.0) +similar to how ``read_csv`` and ``to_csv`` work. .. ipython:: python @@ -3791,7 +3791,7 @@ indexed dimension as the ``where``. .. note:: - Indexes are automagically created (starting ``0.10.1``) on the indexables + Indexes are automagically created on the indexables and any data columns you specify. This behavior can be turned off by passing ``index=False`` to ``append``. @@ -3878,7 +3878,7 @@ create a new table!) Iterator ++++++++ -Starting in ``0.11.0``, you can pass, ``iterator=True`` or ``chunksize=number_in_a_chunk`` +You can pass ``iterator=True`` or ``chunksize=number_in_a_chunk`` to ``select`` and ``select_as_multiple`` to return an iterator on the results. The default is 50,000 rows returned in a chunk. @@ -3986,8 +3986,8 @@ of rows in an object. Multiple Table Queries ++++++++++++++++++++++ -New in 0.10.1 are the methods ``append_to_multiple`` and -``select_as_multiple``, that can perform appending/selecting from +The methods ``append_to_multiple`` and +``select_as_multiple`` can perform appending/selecting from multiple tables at once. The idea is to have one table (call it the selector table) that you index most/all of the columns, and perform your queries. The other table(s) are data tables with an index matching the @@ -4291,7 +4291,7 @@ Pass ``min_itemsize`` on the first table creation to a-priori specify the minimu ``min_itemsize`` can be an integer, or a dict mapping a column name to an integer. You can pass ``values`` as a key to allow all *indexables* or *data_columns* to have this min_itemsize. -Starting in 0.11.0, passing a ``min_itemsize`` dict will cause all passed columns to be created as *data_columns* automatically. +Passing a ``min_itemsize`` dict will cause all passed columns to be created as *data_columns* automatically. .. note:: diff --git a/doc/source/missing_data.rst b/doc/source/missing_data.rst index d54288baa389b..64a321d67a825 100644 --- a/doc/source/missing_data.rst +++ b/doc/source/missing_data.rst @@ -67,9 +67,8 @@ arise and we wish to also consider that "missing" or "not available" or "NA". .. note:: - Prior to version v0.10.0 ``inf`` and ``-inf`` were also - considered to be "NA" in computations. This is no longer the case by - default; use the ``mode.use_inf_as_na`` option to recover it. + If you want to consider ``inf`` and ``-inf`` to be "NA" in computations, + you can set ``pandas.options.mode.use_inf_as_na = True``. .. _missing.isna: @@ -485,8 +484,8 @@ respectively: Replacing Generic Values ~~~~~~~~~~~~~~~~~~~~~~~~ -Often times we want to replace arbitrary values with other values. New in v0.8 -is the ``replace`` method in Series/DataFrame that provides an efficient yet +Often times we want to replace arbitrary values with other values. The +``replace`` method in Series/DataFrame provides an efficient yet flexible way to perform such replacements. For a Series, you can replace a single value or a list of values by another diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst index ce4a920ad77b5..aded5e4402df2 100644 --- a/doc/source/timeseries.rst +++ b/doc/source/timeseries.rst @@ -1069,8 +1069,7 @@ Offset Aliases ~~~~~~~~~~~~~~ A number of string aliases are given to useful common time series -frequencies. We will refer to these aliases as *offset aliases* -(referred to as *time rules* prior to v0.8.0). +frequencies. We will refer to these aliases as *offset aliases*. .. csv-table:: :header: "Alias", "Description" diff --git a/doc/source/visualization.rst b/doc/source/visualization.rst index fb799c642131d..c637246537ca1 100644 --- a/doc/source/visualization.rst +++ b/doc/source/visualization.rst @@ -306,8 +306,6 @@ subplots: df.diff().hist(color='k', alpha=0.5, bins=50) -.. versionadded:: 0.10.0 - The ``by`` keyword can be specified to plot grouped histograms: .. ipython:: python @@ -831,8 +829,6 @@ and take a :class:`Series` or :class:`DataFrame` as an argument. Scatter Matrix Plot ~~~~~~~~~~~~~~~~~~~ -.. versionadded:: 0.7.3 - You can create a scatter plot matrix using the ``scatter_matrix`` method in ``pandas.plotting``: @@ -859,8 +855,6 @@ You can create a scatter plot matrix using the Density Plot ~~~~~~~~~~~~ -.. versionadded:: 0.8.0 - You can create density plots using the :meth:`Series.plot.kde` and :meth:`DataFrame.plot.kde` methods. .. ipython:: python From c2d048137c7288644e8276fed3c5a7071a80221e Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 4 Sep 2017 16:32:34 -0700 Subject: [PATCH 07/57] Remove unused _day and _month attrs (#17431) closes #17429 --- pandas/_libs/tslib.pyx | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 50e0b77c6d3a0..8fbc606ccdfe2 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -829,8 +829,6 @@ class NaTType(_NaT): cdef _NaT base base = _NaT.__new__(cls, 1, 1, 1) - base._day = -1 - base._month = -1 base.value = NPY_NAT return base From 5bca6ce860f66ca6f92327086a954b9e0326a85f Mon Sep 17 00:00:00 2001 From: topper-123 Date: Tue, 5 Sep 2017 11:30:31 +0100 Subject: [PATCH 08/57] DOC: Clean-up references to v12 to v14 (both included) (#17420) --- doc/source/advanced.rst | 21 ++--------- doc/source/basics.rst | 10 +----- doc/source/comparison_with_r.rst | 4 --- doc/source/cookbook.rst | 2 +- doc/source/enhancingperf.rst | 36 ++++++------------- doc/source/groupby.rst | 19 ---------- doc/source/indexing.rst | 23 ++---------- doc/source/install.rst | 2 +- doc/source/io.rst | 61 +++++++++----------------------- doc/source/merging.rst | 2 -- doc/source/missing_data.rst | 9 ----- doc/source/options.rst | 2 +- doc/source/text.rst | 2 -- doc/source/timedeltas.rst | 2 -- doc/source/timeseries.rst | 10 +++--- doc/source/visualization.rst | 16 --------- 16 files changed, 43 insertions(+), 178 deletions(-) diff --git a/doc/source/advanced.rst b/doc/source/advanced.rst index 711c3e9a95d05..4af476cd5a7e1 100644 --- a/doc/source/advanced.rst +++ b/doc/source/advanced.rst @@ -270,9 +270,6 @@ Passing a list of labels or tuples works similar to reindexing: Using slicers ~~~~~~~~~~~~~ -.. versionadded:: 0.14.0 - -In 0.14.0 we added a new way to slice multi-indexed objects. You can slice a multi-index by providing multiple indexers. You can provide any of the selectors as if you are indexing by label, see :ref:`Selection by Label `, @@ -384,7 +381,7 @@ selecting data at a particular level of a MultiIndex easier. .. ipython:: python - # using the slicers (new in 0.14.0) + # using the slicers df.loc[(slice(None),'one'),:] You can also select on the columns with :meth:`~pandas.MultiIndex.xs`, by @@ -397,7 +394,7 @@ providing the axis argument .. ipython:: python - # using the slicers (new in 0.14.0) + # using the slicers df.loc[:,(slice(None),'one')] :meth:`~pandas.MultiIndex.xs` also allows selection with multiple keys @@ -408,11 +405,9 @@ providing the axis argument .. ipython:: python - # using the slicers (new in 0.14.0) + # using the slicers df.loc[:,('bar','one')] -.. versionadded:: 0.13.0 - You can pass ``drop_level=False`` to :meth:`~pandas.MultiIndex.xs` to retain the level that was selected @@ -743,16 +738,6 @@ Prior to 0.18.0, the ``Int64Index`` would provide the default index for all ``ND Float64Index ~~~~~~~~~~~~ -.. note:: - - As of 0.14.0, ``Float64Index`` is backed by a native ``float64`` dtype - array. Prior to 0.14.0, ``Float64Index`` was backed by an ``object`` dtype - array. Using a ``float64`` dtype in the backend speeds up arithmetic - operations by about 30x and boolean indexing operations on the - ``Float64Index`` itself are about 2x as fast. - -.. versionadded:: 0.13.0 - By default a ``Float64Index`` will be automatically created when passing floating, or mixed-integer-floating values in index creation. This enables a pure label-based slicing paradigm that makes ``[],ix,loc`` for scalar indexing and slicing work exactly the same. diff --git a/doc/source/basics.rst b/doc/source/basics.rst index 35eb14eda238f..5880703b1d271 100644 --- a/doc/source/basics.rst +++ b/doc/source/basics.rst @@ -347,7 +347,7 @@ That is because NaNs do not compare as equals: np.nan == np.nan -So, as of v0.13.1, NDFrames (such as Series, DataFrames, and Panels) +So, NDFrames (such as Series, DataFrames, and Panels) have an :meth:`~DataFrame.equals` method for testing equality, with NaNs in corresponding locations treated as equal. @@ -1104,10 +1104,6 @@ Applying with a ``Panel`` will pass a ``Series`` to the applied function. If the function returns a ``Series``, the result of the application will be a ``Panel``. If the applied function reduces to a scalar, the result of the application will be a ``DataFrame``. -.. note:: - - Prior to 0.13.1 ``apply`` on a ``Panel`` would only work on ``ufuncs`` (e.g. ``np.sum/np.max``). - .. ipython:: python import pandas.util.testing as tm @@ -1800,8 +1796,6 @@ Series has the :meth:`~Series.searchsorted` method, which works similar to smallest / largest values ~~~~~~~~~~~~~~~~~~~~~~~~~ -.. versionadded:: 0.14.0 - ``Series`` has the :meth:`~Series.nsmallest` and :meth:`~Series.nlargest` methods which return the smallest or largest :math:`n` values. For a large ``Series`` this can be much faster than sorting the entire Series and calling ``head(n)`` on the result. @@ -2168,8 +2162,6 @@ Selecting columns based on ``dtype`` .. _basics.selectdtypes: -.. versionadded:: 0.14.1 - The :meth:`~DataFrame.select_dtypes` method implements subsetting of columns based on their ``dtype``. diff --git a/doc/source/comparison_with_r.rst b/doc/source/comparison_with_r.rst index 194e022e34c7c..f895cdc25e620 100644 --- a/doc/source/comparison_with_r.rst +++ b/doc/source/comparison_with_r.rst @@ -247,8 +247,6 @@ For more details and examples see :ref:`the reshaping documentation |subset|_ ~~~~~~~~~~ -.. versionadded:: 0.13 - The :meth:`~pandas.DataFrame.query` method is similar to the base R ``subset`` function. In R you might want to get the rows of a ``data.frame`` where one column's values are less than another column's values: @@ -277,8 +275,6 @@ For more details and examples see :ref:`the query documentation |with|_ ~~~~~~~~ -.. versionadded:: 0.13 - An expression using a data.frame called ``df`` in R with the columns ``a`` and ``b`` would be evaluated using ``with`` like so: diff --git a/doc/source/cookbook.rst b/doc/source/cookbook.rst index 32e7a616fe856..f51c3e679b36f 100644 --- a/doc/source/cookbook.rst +++ b/doc/source/cookbook.rst @@ -818,7 +818,7 @@ The :ref:`Concat ` docs. The :ref:`Join ` d df1 = pd.DataFrame(np.random.randn(6, 3), index=rng, columns=['A', 'B', 'C']) df2 = df1.copy() -ignore_index is needed in pandas < v0.13, and depending on df construction +Depending on df construction, ``ignore_index`` may be needed .. ipython:: python diff --git a/doc/source/enhancingperf.rst b/doc/source/enhancingperf.rst index 685a8690a53d5..264bd1de1fc77 100644 --- a/doc/source/enhancingperf.rst +++ b/doc/source/enhancingperf.rst @@ -213,17 +213,18 @@ the rows, applying our ``integrate_f_typed``, and putting this in the zeros arra .. warning:: - In 0.13.0 since ``Series`` has internaly been refactored to no longer sub-class ``ndarray`` - but instead subclass ``NDFrame``, you can **not pass** a ``Series`` directly as a ``ndarray`` typed parameter - to a cython function. Instead pass the actual ``ndarray`` using the ``.values`` attribute of the Series. + You can **not pass** a ``Series`` directly as a ``ndarray`` typed parameter + to a cython function. Instead pass the actual ``ndarray`` using the + ``.values`` attribute of the Series. The reason is that the cython + definition is specific to an ndarray and not the passed Series. - Prior to 0.13.0 + So, do not do this: .. code-block:: python apply_integrate_f(df['a'], df['b'], df['N']) - Use ``.values`` to get the underlying ``ndarray`` + But rather, use ``.values`` to get the underlying ``ndarray`` .. code-block:: python @@ -399,10 +400,8 @@ Read more in the `numba docs `__. .. _enhancingperf.eval: -Expression Evaluation via :func:`~pandas.eval` (Experimental) -------------------------------------------------------------- - -.. versionadded:: 0.13 +Expression Evaluation via :func:`~pandas.eval` +----------------------------------------------- The top-level function :func:`pandas.eval` implements expression evaluation of :class:`~pandas.Series` and :class:`~pandas.DataFrame` objects. @@ -539,10 +538,8 @@ Now let's do the same thing but with comparisons: of type ``bool`` or ``np.bool_``. Again, you should perform these kinds of operations in plain Python. -The ``DataFrame.eval`` method (Experimental) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. versionadded:: 0.13 +The ``DataFrame.eval`` method +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In addition to the top level :func:`pandas.eval` function you can also evaluate an expression in the "context" of a :class:`~pandas.DataFrame`. @@ -646,19 +643,6 @@ whether the query modifies the original frame. Local Variables ~~~~~~~~~~~~~~~ -In pandas version 0.14 the local variable API has changed. In pandas 0.13.x, -you could refer to local variables the same way you would in standard Python. -For example, - -.. code-block:: python - - df = pd.DataFrame(np.random.randn(5, 2), columns=['a', 'b']) - newcol = np.random.randn(len(df)) - df.eval('b + newcol') - - UndefinedVariableError: name 'newcol' is not defined - -As you can see from the exception generated, this syntax is no longer allowed. You must *explicitly reference* any local variable that you want to use in an expression by placing the ``@`` character in front of the name. For example, diff --git a/doc/source/groupby.rst b/doc/source/groupby.rst index 53c0b771555f8..e1231b9a4a200 100644 --- a/doc/source/groupby.rst +++ b/doc/source/groupby.rst @@ -766,8 +766,6 @@ missing values with the ``ffill()`` method. Filtration ---------- -.. versionadded:: 0.12 - The ``filter`` method returns a subset of the original object. Suppose we want to take only elements that belong to groups with a group sum greater than 2. @@ -858,8 +856,6 @@ In this example, we chopped the collection of time series into yearly chunks then independently called :ref:`fillna ` on the groups. -.. versionadded:: 0.14.1 - The ``nlargest`` and ``nsmallest`` methods work on ``Series`` style groupbys: .. ipython:: python @@ -1048,19 +1044,6 @@ Just like for a DataFrame or Series you can call head and tail on a groupby: This shows the first or last n rows from each group. -.. warning:: - - Before 0.14.0 this was implemented with a fall-through apply, - so the result would incorrectly respect the as_index flag: - - .. code-block:: python - - >>> g.head(1): # was equivalent to g.apply(lambda x: x.head(1)) - A B - A - 1 0 1 2 - 5 2 5 6 - .. _groupby.nth: Taking the nth row of each group @@ -1113,8 +1096,6 @@ You can also select multiple rows from each group by specifying multiple nth val Enumerate group items ~~~~~~~~~~~~~~~~~~~~~ -.. versionadded:: 0.13.0 - To see the order in which each row appears within its group, use the ``cumcount`` method: diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst index 4687e46490562..a6e7df57be4e5 100644 --- a/doc/source/indexing.rst +++ b/doc/source/indexing.rst @@ -248,8 +248,6 @@ as an attribute: - In any of these cases, standard indexing will still work, e.g. ``s['1']``, ``s['min']``, and ``s['index']`` will access the corresponding element or column. - - The ``Series/Panel`` accesses are available starting in 0.13.0. - If you are using the IPython environment, you may also use tab-completion to see these accessible attributes. @@ -529,7 +527,6 @@ Out of range slice indexes are handled gracefully just as in Python/Numpy. .. ipython:: python # these are allowed in python/numpy. - # Only works in Pandas starting from v0.14.0. x = list('abcdef') x x[4:10] @@ -539,14 +536,8 @@ Out of range slice indexes are handled gracefully just as in Python/Numpy. s.iloc[4:10] s.iloc[8:10] -.. note:: - - Prior to v0.14.0, ``iloc`` would not accept out of bounds indexers for - slices, e.g. a value that exceeds the length of the object being indexed. - - -Note that this could result in an empty axis (e.g. an empty DataFrame being -returned) +Note that using slices that go out of bounds can result in +an empty axis (e.g. an empty DataFrame being returned) .. ipython:: python @@ -745,8 +736,6 @@ Finally, one can also set a seed for ``sample``'s random number generator using Setting With Enlargement ------------------------ -.. versionadded:: 0.13 - The ``.loc/[]`` operations can perform enlargement when setting a non-existant key for that axis. In the ``Series`` case this is effectively an appending operation @@ -1020,8 +1009,6 @@ partial setting via ``.loc`` (but on the contents rather than the axis labels) df2[ df2[1:4] > 0 ] = 3 df2 -.. versionadded:: 0.13 - Where can also accept ``axis`` and ``level`` parameters to align the input when performing the ``where``. @@ -1064,8 +1051,6 @@ as condition and ``other`` argument. The :meth:`~pandas.DataFrame.query` Method (Experimental) --------------------------------------------------------- -.. versionadded:: 0.13 - :class:`~pandas.DataFrame` objects have a :meth:`~pandas.DataFrame.query` method that allows selection using an expression. @@ -1506,8 +1491,6 @@ The name, if set, will be shown in the console display: Setting metadata ~~~~~~~~~~~~~~~~ -.. versionadded:: 0.13.0 - Indexes are "mostly immutable", but it is possible to set and change their metadata, like the index ``name`` (or, for ``MultiIndex``, ``levels`` and ``labels``). @@ -1790,7 +1773,7 @@ Evaluation order matters Furthermore, in chained expressions, the order may determine whether a copy is returned or not. If an expression will set values on a copy of a slice, then a ``SettingWithCopy`` -exception will be raised (this raise/warn behavior is new starting in 0.13.0) +warning will be issued. You can control the action of a chained assignment via the option ``mode.chained_assignment``, which can take the values ``['raise','warn',None]``, where showing a warning is the default. diff --git a/doc/source/install.rst b/doc/source/install.rst index f92c43839ee31..8dc8224ea6cb2 100644 --- a/doc/source/install.rst +++ b/doc/source/install.rst @@ -107,7 +107,7 @@ following command:: To install a specific pandas version:: - conda install pandas=0.13.1 + conda install pandas=0.20.3 To install other packages, IPython for example:: diff --git a/doc/source/io.rst b/doc/source/io.rst index f68358764a40e..33523ea171f3a 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -1310,8 +1310,6 @@ column widths for contiguous columns: The parser will take care of extra white spaces around the columns so it's ok to have extra separation between the columns in the file. -.. versionadded:: 0.13.0 - By default, ``read_fwf`` will try to infer the file's ``colspecs`` by using the first 100 rows of the file. It can do it only in cases when the columns are aligned and correctly separated by the provided ``delimiter`` (default delimiter @@ -1407,8 +1405,7 @@ Reading columns with a ``MultiIndex`` By specifying list of row locations for the ``header`` argument, you can read in a ``MultiIndex`` for the columns. Specifying non-consecutive -rows will skip the intervening rows. In order to have the pre-0.13 behavior -of tupleizing columns, specify ``tupleize_cols=True``. +rows will skip the intervening rows. .. ipython:: python @@ -1418,7 +1415,7 @@ of tupleizing columns, specify ``tupleize_cols=True``. print(open('mi.csv').read()) pd.read_csv('mi.csv',header=[0,1,2,3],index_col=[0,1]) -Starting in 0.13.0, ``read_csv`` will be able to interpret a more common format +``read_csv`` is also able to interpret a more common format of multi-columns indices. .. ipython:: python @@ -2012,8 +2009,6 @@ The speedup is less noticeable for smaller datasets: Normalization ''''''''''''' -.. versionadded:: 0.13.0 - pandas provides a utility function to take a dict or list of dicts and *normalize* this semi-structured data into a flat table. @@ -2198,8 +2193,6 @@ Reading HTML Content We **highly encourage** you to read the :ref:`HTML Table Parsing gotchas ` below regarding the issues surrounding the BeautifulSoup4/html5lib/lxml parsers. -.. versionadded:: 0.12.0 - The top-level :func:`~pandas.io.html.read_html` function can accept an HTML string/file/URL and will parse HTML tables into list of pandas DataFrames. Let's look at a few examples. @@ -2653,10 +2646,6 @@ of sheet names can simply be passed to ``read_excel`` with no loss in performanc # equivalent using the read_excel function data = read_excel('path_to_file.xls', ['Sheet1', 'Sheet2'], index_col=None, na_values=['NA']) -.. versionadded:: 0.12 - -``ExcelFile`` has been moved to the top level namespace. - .. versionadded:: 0.17 ``read_excel`` can take an ``ExcelFile`` object as input @@ -2716,9 +2705,6 @@ Using a list to get multiple sheets: ``read_excel`` can read more than one sheet, by setting ``sheet_name`` to either a list of sheet names, a list of sheet positions, or ``None`` to read all sheets. - -.. versionadded:: 0.13 - Sheets can be specified by sheet index or sheet name, using an integer or string, respectively. @@ -2866,9 +2852,9 @@ Files with a ``.xls`` extension will be written using ``xlwt`` and those with a ``.xlsx`` extension will be written using ``xlsxwriter`` (if available) or ``openpyxl``. -The DataFrame will be written in a way that tries to mimic the REPL output. One -difference from 0.12.0 is that the ``index_label`` will be placed in the second -row instead of the first. You can get the previous behaviour by setting the +The DataFrame will be written in a way that tries to mimic the REPL output. +The ``index_label`` will be placed in the second +row instead of the first. You can place it in the first row by setting the ``merge_cells`` option in ``to_excel()`` to ``False``: .. code-block:: python @@ -2945,8 +2931,6 @@ Added support for Openpyxl >= 2.2 Excel writer engines '''''''''''''''''''' -.. versionadded:: 0.13 - ``pandas`` chooses an Excel writer via two methods: 1. the ``engine`` keyword argument @@ -3074,14 +3058,19 @@ any pickled pandas object (or any other pickled object) from file: Loading pickled data received from untrusted sources can be unsafe. - See: http://docs.python.org/2.7/library/pickle.html + See: https://docs.python.org/3.6/library/pickle.html .. warning:: - Several internal refactorings, 0.13 (:ref:`Series Refactoring `), and 0.15 (:ref:`Index Refactoring `), - preserve compatibility with pickles created prior to these versions. However, these must - be read with ``pd.read_pickle``, rather than the default python ``pickle.load``. - See `this question `__ + Several internal refactorings have been done while still preserving + compatibility with pickles created with older versions of pandas. However, + for such cases, pickled dataframes, series etc, must be read with + ``pd.read_pickle``, rather than ``pickle.load``. + + See `here `__ + and `here `__ + for some examples of compatibility-breaking changes. See + `this question `__ for a detailed explanation. .. _io.pickle.compression: @@ -3150,9 +3139,7 @@ The default is to 'infer msgpack ------- -.. versionadded:: 0.13.0 - -Starting in 0.13.0, pandas is supporting the ``msgpack`` format for +pandas supports the ``msgpack`` format for object serialization. This is a lightweight portable binary format, similar to binary JSON, that is highly space efficient, and provides good performance both on the writing (serialization), and reading (deserialization). @@ -3424,10 +3411,6 @@ This is also true for the major axis of a ``Panel``: Fixed Format '''''''''''' -.. note:: - - This was prior to 0.13.0 the ``Storer`` format. - The examples above show storing using ``put``, which write the HDF5 to ``PyTables`` in a fixed array format, called the ``fixed`` format. These types of stores are **not** appendable once written (though you can simply remove them and rewrite). Nor are they **queryable**; they must be @@ -3460,8 +3443,6 @@ other sessions. In addition, delete & query type operations are supported. This format is specified by ``format='table'`` or ``format='t'`` to ``append`` or ``put`` or ``to_hdf`` -.. versionadded:: 0.13 - This format can be set as an option as well ``pd.set_option('io.hdf.default_format','table')`` to enable ``put/append/to_hdf`` to by default store in the ``table`` format. @@ -3765,9 +3746,7 @@ space. These are in terms of the total number of rows in a table. Using timedelta64[ns] +++++++++++++++++++++ -.. versionadded:: 0.13 - -Beginning in 0.13.0, you can store and query using the ``timedelta64[ns]`` type. Terms can be +You can store and query using the ``timedelta64[ns]`` type. Terms can be specified in the format: ``()``, where float may be signed (and fractional), and unit can be ``D,s,ms,us,ns`` for the timedelta. Here's an example: @@ -3889,8 +3868,6 @@ The default is 50,000 rows returned in a chunk. .. note:: - .. versionadded:: 0.12.0 - You can also use the iterator with ``read_hdf`` which will open, then automatically close the store when finished iterating. @@ -4603,8 +4580,6 @@ included in Python's standard library by default. You can find an overview of supported drivers for each SQL dialect in the `SQLAlchemy docs `__. -.. versionadded:: 0.14.0 - If SQLAlchemy is not installed, a fallback is only provided for sqlite (and for mysql for backwards compatibility, but this is deprecated and will be removed in a future version). @@ -4937,8 +4912,6 @@ Full documentation can be found `here `__ Stata Format ------------ -.. versionadded:: 0.12.0 - .. _io.stata_writer: Writing to Stata format diff --git a/doc/source/merging.rst b/doc/source/merging.rst index d956f1ca54e6b..a5ee1b1a9384c 100644 --- a/doc/source/merging.rst +++ b/doc/source/merging.rst @@ -1053,8 +1053,6 @@ As you can see, this drops any rows where there was no match. Joining a single Index to a Multi-index ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. versionadded:: 0.14.0 - You can join a singly-indexed ``DataFrame`` with a level of a multi-indexed ``DataFrame``. The level will match on the name of the index of the singly-indexed frame against a level name of the multi-indexed frame. diff --git a/doc/source/missing_data.rst b/doc/source/missing_data.rst index 64a321d67a825..65b411ccd4af2 100644 --- a/doc/source/missing_data.rst +++ b/doc/source/missing_data.rst @@ -263,8 +263,6 @@ and ``bfill()`` is equivalent to ``fillna(method='bfill')`` Filling with a PandasObject ~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. versionadded:: 0.12 - You can also fillna using a dict or Series that is alignable. The labels of the dict or index of the Series must match the columns of the frame you wish to fill. The use case of this is to fill a DataFrame with the mean of that column. @@ -280,8 +278,6 @@ use case of this is to fill a DataFrame with the mean of that column. dff.fillna(dff.mean()) dff.fillna(dff.mean()['B':'C']) -.. versionadded:: 0.13 - Same result as above, but is aligning the 'fill' value which is a Series in this case. @@ -320,11 +316,6 @@ examined :ref:`in the API `. Interpolation ~~~~~~~~~~~~~ -.. versionadded:: 0.13.0 - - :meth:`~pandas.DataFrame.interpolate`, and :meth:`~pandas.Series.interpolate` have - revamped interpolation methods and functionality. - .. versionadded:: 0.17.0 The ``limit_direction`` keyword argument was added. diff --git a/doc/source/options.rst b/doc/source/options.rst index 51d02bc89692a..1592caf90546c 100644 --- a/doc/source/options.rst +++ b/doc/source/options.rst @@ -306,7 +306,7 @@ display.float_format None The callable should accept a fl See core.format.EngFormatter for an example. display.large_repr truncate For DataFrames exceeding max_rows/max_cols, the repr (and HTML repr) can show - a truncated table (the default from 0.13), + a truncated table (the default), or switch to the view from df.info() (the behaviour in earlier versions of pandas). allowable settings, ['truncate', 'info'] diff --git a/doc/source/text.rst b/doc/source/text.rst index e3e4b24d17f44..85b8aa6aa1857 100644 --- a/doc/source/text.rst +++ b/doc/source/text.rst @@ -211,8 +211,6 @@ Extracting Substrings Extract first match in each subject (extract) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -.. versionadded:: 0.13.0 - .. warning:: In version 0.18.0, ``extract`` gained the ``expand`` argument. When diff --git a/doc/source/timedeltas.rst b/doc/source/timedeltas.rst index 07effcfdff33b..daa2c262c8c86 100644 --- a/doc/source/timedeltas.rst +++ b/doc/source/timedeltas.rst @@ -242,8 +242,6 @@ Numeric reduction operation for ``timedelta64[ns]`` will return ``Timedelta`` ob Frequency Conversion -------------------- -.. versionadded:: 0.13 - Timedelta Series, ``TimedeltaIndex``, and ``Timedelta`` scalars can be converted to other 'frequencies' by dividing by another timedelta, or by astyping to a specific timedelta type. These operations yield Series and propagate ``NaT`` -> ``nan``. Note that division by the numpy scalar is true division, while astyping is equivalent of floor division. diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst index aded5e4402df2..c86c58c3183f6 100644 --- a/doc/source/timeseries.rst +++ b/doc/source/timeseries.rst @@ -177,7 +177,7 @@ you can pass the ``dayfirst`` flag: .. note:: Specifying a ``format`` argument will potentially speed up the conversion - considerably and on versions later then 0.13.0 explicitly specifying + considerably and explicitly specifying a format string of '%Y%m%d' takes a faster path still. If you pass a single string to ``to_datetime``, it returns single ``Timestamp``. @@ -1946,9 +1946,11 @@ These can easily be converted to a ``PeriodIndex`` Time Zone Handling ------------------ -Pandas provides rich support for working with timestamps in different time zones using ``pytz`` and ``dateutil`` libraries. -``dateutil`` support is new in 0.14.1 and currently only supported for fixed offset and tzfile zones. The default library is ``pytz``. -Support for ``dateutil`` is provided for compatibility with other applications e.g. if you use ``dateutil`` in other python packages. +Pandas provides rich support for working with timestamps in different time +zones using ``pytz`` and ``dateutil`` libraries. ``dateutil`` currently is only +supported for fixed offset and tzfile zones. The default library is ``pytz``. +Support for ``dateutil`` is provided for compatibility with other +applications e.g. if you use ``dateutil`` in other python packages. Working with Time Zones ~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/source/visualization.rst b/doc/source/visualization.rst index c637246537ca1..839390c8778aa 100644 --- a/doc/source/visualization.rst +++ b/doc/source/visualization.rst @@ -512,8 +512,6 @@ Compare to: Area Plot ~~~~~~~~~ -.. versionadded:: 0.14 - You can create area plots with :meth:`Series.plot.area` and :meth:`DataFrame.plot.area`. Area plots are stacked by default. To produce stacked area plot, each column must be either all positive or all negative values. @@ -550,8 +548,6 @@ To produce an unstacked plot, pass ``stacked=False``. Alpha value is set to 0.5 Scatter Plot ~~~~~~~~~~~~ -.. versionadded:: 0.13 - Scatter plot can be drawn by using the :meth:`DataFrame.plot.scatter` method. Scatter plot requires numeric columns for x and y axis. These can be specified by ``x`` and ``y`` keywords each. @@ -619,8 +615,6 @@ See the :meth:`scatter ` method and the Hexagonal Bin Plot ~~~~~~~~~~~~~~~~~~ -.. versionadded:: 0.14 - You can create hexagonal bin plots with :meth:`DataFrame.plot.hexbin`. Hexbin plots can be a useful alternative to scatter plots if your data are too dense to plot each point individually. @@ -682,8 +676,6 @@ See the :meth:`hexbin ` method and the Pie plot ~~~~~~~~ -.. versionadded:: 0.14 - You can create a pie plot with :meth:`DataFrame.plot.pie` or :meth:`Series.plot.pie`. If your data includes any ``NaN``, they will be automatically filled with 0. A ``ValueError`` will be raised if there are any negative values in your data. @@ -1365,8 +1357,6 @@ Another option is passing an ``ax`` argument to :meth:`Series.plot` to plot on a Plotting With Error Bars ~~~~~~~~~~~~~~~~~~~~~~~~ -.. versionadded:: 0.14 - Plotting with error bars is now supported in the :meth:`DataFrame.plot` and :meth:`Series.plot` Horizontal and vertical errorbars can be supplied to the ``xerr`` and ``yerr`` keyword arguments to :meth:`~DataFrame.plot()`. The error values can be specified using a variety of formats. @@ -1407,8 +1397,6 @@ Here is an example of one way to easily plot group means with standard deviation Plotting Tables ~~~~~~~~~~~~~~~ -.. versionadded:: 0.14 - Plotting with matplotlib table is now supported in :meth:`DataFrame.plot` and :meth:`Series.plot` with a ``table`` keyword. The ``table`` keyword can accept ``bool``, :class:`DataFrame` or :class:`Series`. The simple way to draw a table is to specify ``table=True``. Data will be transposed to meet matplotlib's default layout. .. ipython:: python @@ -1585,10 +1573,6 @@ available in matplotlib. Although this formatting does not provide the same level of refinement you would get when plotting via pandas, it can be faster when plotting a large number of points. -.. note:: - - The speed up for large data sets only applies to pandas 0.14.0 and later. - .. ipython:: python :suppress: From 25d529905521c4710c13b9a2c189a39479c529cb Mon Sep 17 00:00:00 2001 From: s-weigand Date: Wed, 6 Sep 2017 14:03:39 +0200 Subject: [PATCH 09/57] BUG: Plotting Timedelta on y-axis #16953 (#17430) * implemented fix for GH issue #16953 * added tests for fix of issue #16953 * changed comments for git issue to pandas style GH# * changed linelength in tests, so all lines are less than 80 characters * added whatsnew entry * swaped conversion and filtering of values, for plot to also work with object dtypes * refomated code, so len(line) < 80 * changed whatsnew with timedelta and datetime dtypes * added support for datetimetz and extended tests * added reason to pytest.mark.xfail --- doc/source/whatsnew/v0.21.0.txt | 2 +- pandas/plotting/_core.py | 8 ++- pandas/tests/plotting/test_frame.py | 76 +++++++++++++++++++++++++++++ 3 files changed, 84 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 81e52266f972e..1f3bf00c87767 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -432,7 +432,7 @@ I/O Plotting ^^^^^^^^ - Bug in plotting methods using ``secondary_y`` and ``fontsize`` not setting secondary axis font size (:issue:`12565`) - +- Bug when plotting ``timedelta`` and ``datetime`` dtypes on y-axis (:issue:`16953`) Groupby/Resample/Rolling ^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index e5b9497993172..a0b7e93efd05c 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -342,7 +342,13 @@ def _compute_plot_data(self): label = 'None' data = data.to_frame(name=label) - numeric_data = data._convert(datetime=True)._get_numeric_data() + # GH16953, _convert is needed as fallback, for ``Series`` + # with ``dtype == object`` + data = data._convert(datetime=True, timedelta=True) + numeric_data = data.select_dtypes(include=[np.number, + "datetime", + "datetimetz", + "timedelta"]) try: is_empty = numeric_data.empty diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py index 67098529a0111..f3b287a8889c3 100644 --- a/pandas/tests/plotting/test_frame.py +++ b/pandas/tests/plotting/test_frame.py @@ -380,6 +380,82 @@ def test_subplots_timeseries(self): self._check_ticks_props(ax, xlabelsize=7, xrot=45, ylabelsize=7) + def test_subplots_timeseries_y_axis(self): + # GH16953 + data = {"numeric": np.array([1, 2, 5]), + "timedelta": [pd.Timedelta(-10, unit="s"), + pd.Timedelta(10, unit="m"), + pd.Timedelta(10, unit="h")], + "datetime_no_tz": [pd.to_datetime("2017-08-01 00:00:00"), + pd.to_datetime("2017-08-01 02:00:00"), + pd.to_datetime("2017-08-02 00:00:00")], + "datetime_all_tz": [pd.to_datetime("2017-08-01 00:00:00", + utc=True), + pd.to_datetime("2017-08-01 02:00:00", + utc=True), + pd.to_datetime("2017-08-02 00:00:00", + utc=True)], + "text": ["This", "should", "fail"]} + testdata = DataFrame(data) + + ax_numeric = testdata.plot(y="numeric") + assert (ax_numeric.get_lines()[0].get_data()[1] == + testdata["numeric"].values).all() + ax_timedelta = testdata.plot(y="timedelta") + assert (ax_timedelta.get_lines()[0].get_data()[1] == + testdata["timedelta"].values).all() + ax_datetime_no_tz = testdata.plot(y="datetime_no_tz") + assert (ax_datetime_no_tz.get_lines()[0].get_data()[1] == + testdata["datetime_no_tz"].values).all() + ax_datetime_all_tz = testdata.plot(y="datetime_all_tz") + assert (ax_datetime_all_tz.get_lines()[0].get_data()[1] == + testdata["datetime_all_tz"].values).all() + with pytest.raises(TypeError): + testdata.plot(y="text") + + @pytest.mark.xfail(reason='not support for period, categorical, ' + 'datetime_mixed_tz') + def test_subplots_timeseries_y_axis_not_supported(self): + """ + This test will fail for: + period: + since period isn't yet implemented in ``select_dtypes`` + and because it will need a custom value converter + + tick formater (as was done for x-axis plots) + + categorical: + because it will need a custom value converter + + tick formater (also doesn't work for x-axis, as of now) + + datetime_mixed_tz: + because of the way how pandas handels ``Series`` of + ``datetime`` objects with different timezone, + generally converting ``datetime`` objects in a tz-aware + form could help with this problem + """ + data = {"numeric": np.array([1, 2, 5]), + "period": [pd.Period('2017-08-01 00:00:00', freq='H'), + pd.Period('2017-08-01 02:00', freq='H'), + pd.Period('2017-08-02 00:00:00', freq='H')], + "categorical": pd.Categorical(["c", "b", "a"], + categories=["a", "b", "c"], + ordered=False), + "datetime_mixed_tz": [pd.to_datetime("2017-08-01 00:00:00", + utc=True), + pd.to_datetime("2017-08-01 02:00:00"), + pd.to_datetime("2017-08-02 00:00:00")]} + testdata = pd.DataFrame(data) + ax_period = testdata.plot(x="numeric", y="period") + assert (ax_period.get_lines()[0].get_data()[1] == + testdata["period"].values).all() + ax_categorical = testdata.plot(x="numeric", y="categorical") + assert (ax_categorical.get_lines()[0].get_data()[1] == + testdata["categorical"].values).all() + ax_datetime_mixed_tz = testdata.plot(x="numeric", + y="datetime_mixed_tz") + assert (ax_datetime_mixed_tz.get_lines()[0].get_data()[1] == + testdata["datetime_mixed_tz"].values).all() + @pytest.mark.slow def test_subplots_layout(self): # GH 6667 From 84a39f99013f238a2e1df9ba63bdaa8a3fd00c08 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 6 Sep 2017 08:23:06 -0400 Subject: [PATCH 10/57] COMPAT: handle pyarrow deprecation of timestamps_to_ms in .from_pandas with pyarrow < 0.6.0 (#17447) closes #17438 --- ci/requirements-3.5.sh | 2 +- doc/source/whatsnew/v0.21.0.txt | 2 +- pandas/io/parquet.py | 18 ++++++++++++++---- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/ci/requirements-3.5.sh b/ci/requirements-3.5.sh index 33db9c28c78a9..d694ad3679ac1 100644 --- a/ci/requirements-3.5.sh +++ b/ci/requirements-3.5.sh @@ -8,4 +8,4 @@ echo "install 35" conda remove -n pandas python-dateutil --force pip install python-dateutil -conda install -n pandas -c conda-forge feather-format pyarrow=0.4.1 +conda install -n pandas -c conda-forge feather-format pyarrow=0.5.0 diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 1f3bf00c87767..b24a6f067cee4 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -125,7 +125,7 @@ Other Enhancements - :func:`DataFrame.select_dtypes` now accepts scalar values for include/exclude as well as list-like. (:issue:`16855`) - :func:`date_range` now accepts 'YS' in addition to 'AS' as an alias for start of year (:issue:`9313`) - :func:`date_range` now accepts 'Y' in addition to 'A' as an alias for end of year (:issue:`9313`) -- Integration with `Apache Parquet `__, including a new top-level :func:`read_parquet` and :func:`DataFrame.to_parquet` method, see :ref:`here `. +- Integration with `Apache Parquet `__, including a new top-level :func:`read_parquet` and :func:`DataFrame.to_parquet` method, see :ref:`here `. (:issue:`15838`, :issue:`17438`) - :func:`DataFrame.add_prefix` and :func:`DataFrame.add_suffix` now accept strings containing the '%' character. (:issue:`17151`) - `read_*` methods can now infer compression from non-string paths, such as ``pathlib.Path`` objects (:issue:`17206`). - :func:`pd.read_sas()` now recognizes much more of the most frequently used date (datetime) formats in SAS7BDAT files (:issue:`15871`). diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index 09603fd6fdcce..4b507b7f5df6f 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -58,13 +58,23 @@ def __init__(self): "\nor via pip\n" "pip install -U pyarrow\n") + self._pyarrow_lt_050 = LooseVersion(pyarrow.__version__) < '0.5.0' + self._pyarrow_lt_060 = LooseVersion(pyarrow.__version__) < '0.6.0' self.api = pyarrow - def write(self, df, path, compression='snappy', **kwargs): + def write(self, df, path, compression='snappy', + coerce_timestamps='ms', **kwargs): path, _, _ = get_filepath_or_buffer(path) - table = self.api.Table.from_pandas(df, timestamps_to_ms=True) - self.api.parquet.write_table( - table, path, compression=compression, **kwargs) + if self._pyarrow_lt_060: + table = self.api.Table.from_pandas(df, timestamps_to_ms=True) + self.api.parquet.write_table( + table, path, compression=compression, **kwargs) + + else: + table = self.api.Table.from_pandas(df) + self.api.parquet.write_table( + table, path, compression=compression, + coerce_timestamps=coerce_timestamps, **kwargs) def read(self, path): path, _, _ = get_filepath_or_buffer(path) From d4577911c750f2f48f760ce451d413116bed72da Mon Sep 17 00:00:00 2001 From: topper-123 Date: Wed, 6 Sep 2017 15:55:12 +0100 Subject: [PATCH 11/57] DOC/TST: Add examples to MultiIndex.get_level_values + related changes (#17414) --- pandas/core/indexes/base.py | 12 ++++++++++-- pandas/core/indexes/multi.py | 23 +++++++++++++++++++++-- pandas/tests/indexes/test_base.py | 6 ++++++ 3 files changed, 37 insertions(+), 4 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 6a30eaefaaae7..a9098126a38e3 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2529,15 +2529,23 @@ def set_value(self, arr, key, value): def _get_level_values(self, level): """ Return an Index of values for requested level, equal to the length - of the index + of the index. Parameters ---------- - level : int + level : int or str + ``level`` is either the integer position of the level in the + MultiIndex, or the name of the level. Returns ------- values : Index + ``self``, as there is only one level in the Index. + + See also + --------- + pandas.MultiIndex.get_level_values : get values for a level of a + MultiIndex """ self._validate_index_level(level) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index d7d5b6d128a2c..8b2cf0e7c0b40 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -882,15 +882,34 @@ def _get_level_values(self, level): def get_level_values(self, level): """ Return vector of label values for requested level, - equal to the length of the index + equal to the length of the index. Parameters ---------- - level : int or level name + level : int or str + ``level`` is either the integer position of the level in the + MultiIndex, or the name of the level. Returns ------- values : Index + ``values`` is a level of this MultiIndex converted to + a single :class:`Index` (or subclass thereof). + + Examples + --------- + + Create a MultiIndex: + + >>> mi = pd.MultiIndex.from_arrays((list('abc'), list('def'))) + >>> mi.names = ['level_1', 'level_2'] + + Get level values by supplying level as either integer or name: + + >>> mi.get_level_values(0) + Index(['a', 'b', 'c'], dtype='object', name='level_1') + >>> mi.get_level_values('level_2') + Index(['d', 'e', 'f'], dtype='object', name='level_2') """ level = self._get_level_number(level) values = self._get_level_values(level) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index aa32e75ba0d58..f96dbdcfb8acf 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -1438,6 +1438,12 @@ def test_get_level_values(self): result = self.strIndex.get_level_values(0) tm.assert_index_equal(result, self.strIndex) + # test for name (GH 17414) + index_with_name = self.strIndex.copy() + index_with_name.name = 'a' + result = index_with_name.get_level_values('a') + tm.assert_index_equal(result, index_with_name) + def test_slice_keep_name(self): idx = Index(['a', 'b'], name='asdf') assert idx.name == idx[1:].name From b8694460dbe3d4d82adb757a37e5f515356b1cde Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 6 Sep 2017 17:14:05 -0700 Subject: [PATCH 12/57] Dont re-pin total_seconds as it is already implemented (#17432) --- pandas/_libs/tslib.pyx | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 8fbc606ccdfe2..962c2ef3956a1 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -856,6 +856,9 @@ class NaTType(_NaT): return (__nat_unpickle, (None, )) def total_seconds(self): + """ + Total duration of timedelta in seconds (to ns precision) + """ # GH 10939 return np.nan @@ -3890,8 +3893,9 @@ for field in fields: _nat_methods = ['date', 'now', 'replace', 'to_pydatetime', 'today', 'round', 'floor', 'ceil', 'tz_convert', 'tz_localize'] -_nan_methods = ['weekday', 'isoweekday', 'total_seconds'] -_implemented_methods = ['to_datetime', 'to_datetime64', 'isoformat'] +_nan_methods = ['weekday', 'isoweekday'] +_implemented_methods = [ + 'to_datetime', 'to_datetime64', 'isoformat', 'total_seconds'] _implemented_methods.extend(_nat_methods) _implemented_methods.extend(_nan_methods) From 3a12687c4e91501d805fc71c37e9ce0a496b48bf Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Wed, 6 Sep 2017 17:46:50 -0700 Subject: [PATCH 13/57] BUG: Return local Timestamp.weekday_name attribute (#17354) (#17377) --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/_libs/tslib.pyx | 22 ++++++++++++++++++---- pandas/tests/scalar/test_timestamp.py | 8 ++++++++ 3 files changed, 27 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index b24a6f067cee4..553e622b8560e 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -399,6 +399,7 @@ Conversion - Fixed the return type of ``IntervalIndex.is_non_overlapping_monotonic`` to be a Python ``bool`` for consistency with similar attributes/methods. Previously returned a ``numpy.bool_``. (:issue:`17237`) - Bug in ``IntervalIndex.is_non_overlapping_monotonic`` when intervals are closed on both sides and overlap at a point (:issue:`16560`) - Bug in :func:`Series.fillna` returns frame when ``inplace=True`` and ``value`` is dict (:issue:`16156`) +- Bug in :attr:`Timestamp.weekday_name` returning a UTC-based weekday name when localized to a timezone (:issue:`17354`) Indexing ^^^^^^^^ diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 962c2ef3956a1..f31be9502499f 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -532,9 +532,7 @@ class Timestamp(_Timestamp): @property def weekday_name(self): - out = get_date_name_field( - np.array([self.value], dtype=np.int64), 'weekday_name') - return out[0] + return self._get_named_field('weekday_name') @property def dayofyear(self): @@ -1269,13 +1267,29 @@ cdef class _Timestamp(datetime): # same timezone if specified) return datetime.__sub__(self, other) - cpdef _get_field(self, field): + cdef int64_t _maybe_convert_value_to_local(self): + """Convert UTC i8 value to local i8 value if tz exists""" + cdef: + int64_t val val = self.value if self.tz is not None and not _is_utc(self.tz): val = tz_convert_single(self.value, 'UTC', self.tz) + return val + + cpdef _get_field(self, field): + cdef: + int64_t val + val = self._maybe_convert_value_to_local() out = get_date_field(np.array([val], dtype=np.int64), field) return int(out[0]) + cpdef _get_named_field(self, field): + cdef: + int64_t val + val = self._maybe_convert_value_to_local() + out = get_date_name_field(np.array([val], dtype=np.int64), field) + return out[0] + cpdef _get_start_end_field(self, field): month_kw = self.freq.kwds.get( 'startingMonth', self.freq.kwds.get( diff --git a/pandas/tests/scalar/test_timestamp.py b/pandas/tests/scalar/test_timestamp.py index 7cd1a7db0f9fe..8d47ce4802ac6 100644 --- a/pandas/tests/scalar/test_timestamp.py +++ b/pandas/tests/scalar/test_timestamp.py @@ -555,6 +555,14 @@ def check(value, equal): for end in ends: assert getattr(ts, end) + @pytest.mark.parametrize('data, expected', + [(Timestamp('2017-08-28 23:00:00'), 'Monday'), + (Timestamp('2017-08-28 23:00:00', tz='EST'), + 'Monday')]) + def test_weekday_name(self, data, expected): + # GH 17354 + assert data.weekday_name == expected + def test_pprint(self): # GH12622 import pprint From fd137f537051ad98ca9a9c069827c72a8b9e7543 Mon Sep 17 00:00:00 2001 From: Pietro Battiston Date: Thu, 7 Sep 2017 02:47:43 +0200 Subject: [PATCH 14/57] BUG: intersection of decreasing RangeIndexes (#17374) closes #17296 --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/indexes/range.py | 22 +++++++++++++--------- pandas/tests/indexes/test_range.py | 15 +++++++++++++++ 3 files changed, 29 insertions(+), 9 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 553e622b8560e..f7cd8230c8b9b 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -417,6 +417,7 @@ Indexing - Bug in ``.iloc`` when used with inplace addition or assignment and an int indexer on a ``MultiIndex`` causing the wrong indexes to be read from and written to (:issue:`17148`) - Bug in ``.isin()`` in which checking membership in empty ``Series`` objects raised an error (:issue:`16991`) - Bug in ``CategoricalIndex`` reindexing in which specified indices containing duplicates were not being respected (:issue:`17323`) +- Bug in intersection of ``RangeIndex`` with negative step (:issue:`17296`) I/O ^^^ diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 82412d3a7ef57..b759abaed4e56 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -324,12 +324,13 @@ def intersection(self, other): if not len(self) or not len(other): return RangeIndex._simple_new(None) + first = self[::-1] if self._step < 0 else self + second = other[::-1] if other._step < 0 else other + # check whether intervals intersect # deals with in- and decreasing ranges - int_low = max(min(self._start, self._stop + 1), - min(other._start, other._stop + 1)) - int_high = min(max(self._stop, self._start + 1), - max(other._stop, other._start + 1)) + int_low = max(first._start, second._start) + int_high = min(first._stop, second._stop) if int_high <= int_low: return RangeIndex._simple_new(None) @@ -337,21 +338,24 @@ def intersection(self, other): # solve intersection problem # performance hint: for identical step sizes, could use # cheaper alternative - gcd, s, t = self._extended_gcd(self._step, other._step) + gcd, s, t = first._extended_gcd(first._step, second._step) # check whether element sets intersect - if (self._start - other._start) % gcd: + if (first._start - second._start) % gcd: return RangeIndex._simple_new(None) # calculate parameters for the RangeIndex describing the # intersection disregarding the lower bounds - tmp_start = self._start + (other._start - self._start) * \ - self._step // gcd * s - new_step = self._step * other._step // gcd + tmp_start = first._start + (second._start - first._start) * \ + first._step // gcd * s + new_step = first._step * second._step // gcd new_index = RangeIndex(tmp_start, int_high, new_step, fastpath=True) # adjust index to limiting interval new_index._start = new_index._min_fitting_element(int_low) + + if (self._step < 0 and other._step < 0) is not (new_index._step < 0): + new_index = new_index[::-1] return new_index def _min_fitting_element(self, lower_limit): diff --git a/pandas/tests/indexes/test_range.py b/pandas/tests/indexes/test_range.py index 5ecf467b57fc5..06c8f0ee392c7 100644 --- a/pandas/tests/indexes/test_range.py +++ b/pandas/tests/indexes/test_range.py @@ -610,6 +610,21 @@ def test_intersection(self): other.values))) tm.assert_index_equal(result, expected) + # reversed (GH 17296) + result = other.intersection(self.index) + tm.assert_index_equal(result, expected) + + # GH 17296: intersect two decreasing RangeIndexes + first = RangeIndex(10, -2, -2) + other = RangeIndex(5, -4, -1) + expected = first.astype(int).intersection(other.astype(int)) + result = first.intersection(other).astype(int) + tm.assert_index_equal(result, expected) + + # reversed + result = other.intersection(first).astype(int) + tm.assert_index_equal(result, expected) + index = RangeIndex(5) # intersect of non-overlapping indices From 93e23a71f583920b46b4bb20e99a9a5e73685c47 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 6 Sep 2017 17:51:50 -0700 Subject: [PATCH 15/57] Remove property that re-computed microsecond (#17331) --- asv_bench/benchmarks/timestamp.py | 60 +++++++++++++++++++++++++++++++ doc/source/whatsnew/v0.21.0.txt | 2 +- pandas/_libs/period.pyx | 1 + pandas/_libs/tslib.pyx | 4 --- 4 files changed, 62 insertions(+), 5 deletions(-) create mode 100644 asv_bench/benchmarks/timestamp.py diff --git a/asv_bench/benchmarks/timestamp.py b/asv_bench/benchmarks/timestamp.py new file mode 100644 index 0000000000000..066479b22739a --- /dev/null +++ b/asv_bench/benchmarks/timestamp.py @@ -0,0 +1,60 @@ +from .pandas_vb_common import * +from pandas import to_timedelta, Timestamp + + +class TimestampProperties(object): + goal_time = 0.2 + + def setup(self): + self.ts = Timestamp('2017-08-25 08:16:14') + + def time_tz(self): + self.ts.tz + + def time_offset(self): + self.ts.offset + + def time_dayofweek(self): + self.ts.dayofweek + + def time_weekday_name(self): + self.ts.weekday_name + + def time_dayofyear(self): + self.ts.dayofyear + + def time_week(self): + self.ts.week + + def time_quarter(self): + self.ts.quarter + + def time_days_in_month(self): + self.ts.days_in_month + + def time_freqstr(self): + self.ts.freqstr + + def time_is_month_start(self): + self.ts.is_month_start + + def time_is_month_end(self): + self.ts.is_month_end + + def time_is_quarter_start(self): + self.ts.is_quarter_start + + def time_is_quarter_end(self): + self.ts.is_quarter_end + + def time_is_year_start(self): + self.ts.is_quarter_end + + def time_is_year_end(self): + self.ts.is_quarter_end + + def time_is_leap_year(self): + self.ts.is_quarter_end + + def time_microsecond(self): + self.ts.microsecond diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index f7cd8230c8b9b..33a6db18db3ca 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -382,7 +382,7 @@ Performance Improvements - Improved performance of instantiating :class:`SparseDataFrame` (:issue:`16773`) - :attr:`Series.dt` no longer performs frequency inference, yielding a large speedup when accessing the attribute (:issue:`17210`) - +- :attr:`Timestamp.microsecond` no longer re-computes on attribute access (:issue:`17331`) .. _whatsnew_0210.bug_fixes: diff --git a/pandas/_libs/period.pyx b/pandas/_libs/period.pyx index 816b7ebfff86d..0ade8f9a6dde5 100644 --- a/pandas/_libs/period.pyx +++ b/pandas/_libs/period.pyx @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- from datetime import datetime, date, timedelta import operator diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index f31be9502499f..a7b33c669a8b8 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -544,10 +544,6 @@ class Timestamp(_Timestamp): weekofyear = week - @property - def microsecond(self): - return self._get_field('us') - @property def quarter(self): return self._get_field('q') From 20fee85ede7f2052f855b8f3445cd1ffc17ee0c3 Mon Sep 17 00:00:00 2001 From: topper-123 Date: Thu, 7 Sep 2017 02:00:49 +0100 Subject: [PATCH 16/57] cleaned references to pandas v0.15 and v0.16 in docs (#17442) --- doc/source/10min.rst | 2 +- doc/source/advanced.rst | 15 ++------ doc/source/basics.rst | 8 ++-- doc/source/categorical.rst | 64 +++++--------------------------- doc/source/comparison_with_r.rst | 2 - doc/source/computation.rst | 7 +--- doc/source/cookbook.rst | 6 --- doc/source/dsintro.rst | 2 - doc/source/gotchas.rst | 4 +- doc/source/indexing.rst | 14 ------- doc/source/install.rst | 20 ++++------ doc/source/io.rst | 41 ++++---------------- doc/source/remote_data.rst | 11 +++--- doc/source/reshaping.rst | 4 +- doc/source/sparse.rst | 2 - doc/source/timedeltas.rst | 26 +++++-------- doc/source/visualization.rst | 4 -- doc/source/whatsnew/v0.21.0.txt | 1 + 18 files changed, 53 insertions(+), 180 deletions(-) diff --git a/doc/source/10min.rst b/doc/source/10min.rst index def49a641a0ff..ef6b2d6ef2c90 100644 --- a/doc/source/10min.rst +++ b/doc/source/10min.rst @@ -655,7 +655,7 @@ the quarter end: Categoricals ------------ -Since version 0.15, pandas can include categorical data in a ``DataFrame``. For full docs, see the +pandas can include categorical data in a ``DataFrame``. For full docs, see the :ref:`categorical introduction ` and the :ref:`API documentation `. .. ipython:: python diff --git a/doc/source/advanced.rst b/doc/source/advanced.rst index 4af476cd5a7e1..3f145cf955664 100644 --- a/doc/source/advanced.rst +++ b/doc/source/advanced.rst @@ -26,12 +26,6 @@ See the :ref:`Indexing and Selecting Data ` for general indexing docum should be avoided. See :ref:`Returning a View versus Copy ` -.. warning:: - - In 0.15.0 ``Index`` has internally been refactored to no longer sub-class ``ndarray`` - but instead subclass ``PandasObject``, similarly to the rest of the pandas objects. This should be - a transparent change with only very limited API implications (See the :ref:`Internal Refactoring `) - See the :ref:`cookbook` for some advanced strategies .. _advanced.hierarchical: @@ -638,12 +632,9 @@ In the following sub-sections we will highlite some other index types. CategoricalIndex ~~~~~~~~~~~~~~~~ -.. versionadded:: 0.16.1 - -We introduce a ``CategoricalIndex``, a new type of index object that is useful for supporting -indexing with duplicates. This is a container around a ``Categorical`` (introduced in v0.15.0) -and allows efficient indexing and storage of an index with a large number of duplicated elements. Prior to 0.16.1, -setting the index of a ``DataFrame/Series`` with a ``category`` dtype would convert this to regular object-based ``Index``. +``CategoricalIndex`` is a type of index that is useful for supporting +indexing with duplicates. This is a container around a ``Categorical`` +and allows efficient indexing and storage of an index with a large number of duplicated elements. .. ipython:: python diff --git a/doc/source/basics.rst b/doc/source/basics.rst index 5880703b1d271..42c28df3a6030 100644 --- a/doc/source/basics.rst +++ b/doc/source/basics.rst @@ -719,8 +719,6 @@ on an entire ``DataFrame`` or ``Series``, row- or column-wise, or elementwise. Tablewise Function Application ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. versionadded:: 0.16.2 - ``DataFrames`` and ``Series`` can of course just be passed into functions. However, if the function needs to be called in a chain, consider using the :meth:`~DataFrame.pipe` method. Compare the following @@ -1860,8 +1858,10 @@ dtypes ------ The main types stored in pandas objects are ``float``, ``int``, ``bool``, -``datetime64[ns]`` and ``datetime64[ns, tz]`` (in >= 0.17.0), ``timedelta[ns]``, ``category`` (in >= 0.15.0), and ``object``. In addition these dtypes -have item sizes, e.g. ``int64`` and ``int32``. See :ref:`Series with TZ ` for more detail on ``datetime64[ns, tz]`` dtypes. +``datetime64[ns]`` and ``datetime64[ns, tz]`` (in >= 0.17.0), ``timedelta[ns]``, +``category`` and ``object``. In addition these dtypes have item sizes, e.g. +``int64`` and ``int32``. See :ref:`Series with TZ ` +for more detail on ``datetime64[ns, tz]`` dtypes. A convenient :attr:`~DataFrame.dtypes` attribute for DataFrames returns a Series with the data type of each column. diff --git a/doc/source/categorical.rst b/doc/source/categorical.rst index 02d7920bc4a84..8835c4a1533d0 100644 --- a/doc/source/categorical.rst +++ b/doc/source/categorical.rst @@ -16,13 +16,6 @@ Categorical Data **************** -.. versionadded:: 0.15 - -.. note:: - While there was `pandas.Categorical` in earlier versions, the ability to use - categorical data in `Series` and `DataFrame` is new. - - This is an introduction to pandas categorical data type, including a short comparison with R's ``factor``. @@ -295,10 +288,6 @@ Sorting and Order .. _categorical.sort: -.. warning:: - - The default for construction has changed in v0.16.0 to ``ordered=False``, from the prior implicit ``ordered=True`` - If categorical data is ordered (``s.cat.ordered == True``), then the order of the categories has a meaning and certain operations are possible. If the categorical is unordered, ``.min()/.max()`` will raise a `TypeError`. @@ -803,13 +792,11 @@ Following table summarizes the results of ``Categoricals`` related concatenation Getting Data In/Out ------------------- -.. versionadded:: 0.15.2 +You can write data that contains ``category`` dtypes to a ``HDFStore``. +See :ref:`here ` for an example and caveats. -Writing data (`Series`, `Frames`) to a HDF store that contains a ``category`` dtype was implemented -in 0.15.2. See :ref:`here ` for an example and caveats. - -Writing data to and reading data from *Stata* format files was implemented in -0.15.2. See :ref:`here ` for an example and caveats. +It is also possible to write data to and reading data from *Stata* format files. +See :ref:`here ` for an example and caveats. Writing to a CSV file will convert the data, effectively removing any information about the categorical (categories and ordering). So if you read back the CSV file you have to convert the @@ -928,32 +915,6 @@ an ``object`` dtype is a constant times the length of the data. s.astype('category').nbytes -Old style constructor usage -~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -In earlier versions than pandas 0.15, a `Categorical` could be constructed by passing in precomputed -`codes` (called then `labels`) instead of values with categories. The `codes` were interpreted as -pointers to the categories with `-1` as `NaN`. This type of constructor usage is replaced by -the special constructor :func:`Categorical.from_codes`. - -Unfortunately, in some special cases, using code which assumes the old style constructor usage -will work with the current pandas version, resulting in subtle bugs: - -.. code-block:: python - - >>> cat = pd.Categorical([1,2], [1,2,3]) - >>> # old version - >>> cat.get_values() - array([2, 3], dtype=int64) - >>> # new version - >>> cat.get_values() - array([1, 2], dtype=int64) - -.. warning:: - If you used `Categoricals` with older versions of pandas, please audit your code before - upgrading and change your code to use the :func:`~pandas.Categorical.from_codes` - constructor. - `Categorical` is not a `numpy` array ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -982,8 +943,7 @@ Dtype comparisons work: dtype == np.str_ np.str_ == dtype -To check if a Series contains Categorical data, with pandas 0.16 or later, use -``hasattr(s, 'cat')``: +To check if a Series contains Categorical data, use ``hasattr(s, 'cat')``: .. ipython:: python @@ -1023,13 +983,13 @@ basic type) and applying along columns will also convert to object. Categorical Index ~~~~~~~~~~~~~~~~~ -.. versionadded:: 0.16.1 - -A new ``CategoricalIndex`` index type is introduced in version 0.16.1. See the -:ref:`advanced indexing docs ` for a more detailed +``CategoricalIndex`` is a type of index that is useful for supporting +indexing with duplicates. This is a container around a ``Categorical`` +and allows efficient indexing and storage of an index with a large number of duplicated elements. +See the :ref:`advanced indexing docs ` for a more detailed explanation. -Setting the index, will create create a ``CategoricalIndex`` +Setting the index will create a ``CategoricalIndex`` .. ipython:: python @@ -1041,10 +1001,6 @@ Setting the index, will create create a ``CategoricalIndex`` # This now sorts by the categories order df.sort_index() -In previous versions (<0.16.1) there is no index of type ``category``, so -setting the index to categorical column will convert the categorical data to a -"normal" dtype first and therefore remove any custom ordering of the categories. - Side Effects ~~~~~~~~~~~~ diff --git a/doc/source/comparison_with_r.rst b/doc/source/comparison_with_r.rst index f895cdc25e620..eb97aeeb7e696 100644 --- a/doc/source/comparison_with_r.rst +++ b/doc/source/comparison_with_r.rst @@ -505,8 +505,6 @@ For more details and examples see :ref:`the reshaping documentation |factor|_ ~~~~~~~~~ -.. versionadded:: 0.15 - pandas has a data type for categorical data. .. code-block:: r diff --git a/doc/source/computation.rst b/doc/source/computation.rst index 76a030d355e33..23699393958cf 100644 --- a/doc/source/computation.rst +++ b/doc/source/computation.rst @@ -924,15 +924,12 @@ EWM has a ``min_periods`` argument, which has the same meaning it does for all the ``.expanding`` and ``.rolling`` methods: no output values will be set until at least ``min_periods`` non-null values are encountered in the (expanding) window. -(This is a change from versions prior to 0.15.0, in which the ``min_periods`` -argument affected only the ``min_periods`` consecutive entries starting at the -first non-null value.) -EWM also has an ``ignore_na`` argument, which deterines how +EWM also has an ``ignore_na`` argument, which determines how intermediate null values affect the calculation of the weights. When ``ignore_na=False`` (the default), weights are calculated based on absolute positions, so that intermediate null values affect the result. -When ``ignore_na=True`` (which reproduces the behavior in versions prior to 0.15.0), +When ``ignore_na=True``, weights are calculated by ignoring intermediate null values. For example, assuming ``adjust=True``, if ``ignore_na=False``, the weighted average of ``3, NaN, 5`` would be calculated as diff --git a/doc/source/cookbook.rst b/doc/source/cookbook.rst index f51c3e679b36f..5bb3ba75fe51b 100644 --- a/doc/source/cookbook.rst +++ b/doc/source/cookbook.rst @@ -256,12 +256,6 @@ Panels pf = pd.Panel({'df1':df1,'df2':df2,'df3':df3});pf - #Assignment using Transpose (pandas < 0.15) - pf = pf.transpose(2,0,1) - pf['E'] = pd.DataFrame(data, rng, cols) - pf = pf.transpose(1,2,0);pf - - #Direct assignment (pandas > 0.15) pf.loc[:,:,'F'] = pd.DataFrame(data, rng, cols);pf `Mask a panel by using np.where and then reconstructing the panel with the new masked values diff --git a/doc/source/dsintro.rst b/doc/source/dsintro.rst index 4652ccbf0ad34..ec0a1c7a00bf7 100644 --- a/doc/source/dsintro.rst +++ b/doc/source/dsintro.rst @@ -453,8 +453,6 @@ available to insert at a particular location in the columns: Assigning New Columns in Method Chains ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. versionadded:: 0.16.0 - Inspired by `dplyr's `__ ``mutate`` verb, DataFrame has an :meth:`~pandas.DataFrame.assign` diff --git a/doc/source/gotchas.rst b/doc/source/gotchas.rst index a3062b4086673..9e6f98923fca6 100644 --- a/doc/source/gotchas.rst +++ b/doc/source/gotchas.rst @@ -22,8 +22,8 @@ Frequently Asked Questions (FAQ) DataFrame memory usage ---------------------- -As of pandas version 0.15.0, the memory usage of a dataframe (including -the index) is shown when accessing the ``info`` method of a dataframe. A +The memory usage of a dataframe (including the index) +is shown when accessing the ``info`` method of a dataframe. A configuration option, ``display.memory_usage`` (see :ref:`options`), specifies if the dataframe's memory usage will be displayed when invoking the ``df.info()`` method. diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst index a6e7df57be4e5..88e62b5d301a3 100644 --- a/doc/source/indexing.rst +++ b/doc/source/indexing.rst @@ -47,12 +47,6 @@ advanced indexing. should be avoided. See :ref:`Returning a View versus Copy ` -.. warning:: - - In 0.15.0 ``Index`` has internally been refactored to no longer subclass ``ndarray`` - but instead subclass ``PandasObject``, similarly to the rest of the pandas objects. This should be - a transparent change with only very limited API implications (See the :ref:`Internal Refactoring `) - .. warning:: Indexing on an integer-based Index with floats has been clarified in 0.18.0, for a summary of the changes, see :ref:`here `. @@ -660,7 +654,6 @@ For getting *multiple* indexers, using ``.get_indexer`` Selecting Random Samples ------------------------ -.. versionadded::0.16.1 A random selection of rows or columns from a Series, DataFrame, or Panel with the :meth:`~DataFrame.sample` method. The method will sample rows by default, and accepts a specific number of rows/columns to return, or a fraction of rows. @@ -1510,8 +1503,6 @@ See :ref:`Advanced Indexing ` for usage of MultiIndexes. ind.name = "bob" ind -.. versionadded:: 0.15.0 - ``set_names``, ``set_levels``, and ``set_labels`` also take an optional `level`` argument @@ -1527,11 +1518,6 @@ Set operations on Index objects .. _indexing.set_ops: -.. warning:: - - In 0.15.0. the set operations ``+`` and ``-`` were deprecated in order to provide these for numeric type operations on certain - index types. ``+`` can be replace by ``.union()`` or ``|``, and ``-`` by ``.difference()``. - The two main operations are ``union (|)``, ``intersection (&)`` These can be directly called as instance methods or used via overloaded operators. Difference is provided via the ``.difference()`` method. diff --git a/doc/source/install.rst b/doc/source/install.rst index 8dc8224ea6cb2..c805f84d0faaa 100644 --- a/doc/source/install.rst +++ b/doc/source/install.rst @@ -18,7 +18,7 @@ Instructions for installing from source, Python version support ---------------------- -Officially Python 2.7, 3.4, 3.5, and 3.6 +Officially Python 2.7, 3.5, and 3.6. Installing pandas ----------------- @@ -183,21 +183,17 @@ installed), make sure you have `pytest >>> import pandas as pd >>> pd.test() - Running unit tests for pandas - pandas version 0.18.0 - numpy version 1.10.2 - pandas is installed in pandas - Python version 2.7.11 |Continuum Analytics, Inc.| - (default, Dec 6 2015, 18:57:58) [GCC 4.2.1 (Apple Inc. build 5577)] - nose version 1.3.7 + running: pytest --skip-slow --skip-network C:\Users\TP\Anaconda3\envs\py36\lib\site-packages\pandas + ============================= test session starts ============================= + platform win32 -- Python 3.6.2, pytest-3.2.1, py-1.4.34, pluggy-0.4.0 + rootdir: C:\Users\TP\Documents\Python\pandasdev\pandas, inifile: setup.cfg + collected 12145 items / 3 skipped + ..................................................................S...... ........S................................................................ ......................................................................... - ---------------------------------------------------------------------- - Ran 9252 tests in 368.339s - - OK (SKIP=117) + ==================== 12130 passed, 12 skipped in 368.339 seconds ===================== Dependencies ------------ diff --git a/doc/source/io.rst b/doc/source/io.rst index 33523ea171f3a..de3150035c446 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -592,8 +592,7 @@ Ignoring line comments and empty lines ++++++++++++++++++++++++++++++++++++++ If the ``comment`` parameter is specified, then completely commented lines will -be ignored. By default, completely blank lines will be ignored as well. Both of -these are API changes introduced in version 0.15. +be ignored. By default, completely blank lines will be ignored as well. .. ipython:: python @@ -2701,8 +2700,6 @@ Using a list to get multiple sheets: # Returns the 1st and 4th sheet, as a dictionary of DataFrames. read_excel('path_to_file.xls',sheet_name=['Sheet1',3]) -.. versionadded:: 0.16 - ``read_excel`` can read more than one sheet, by setting ``sheet_name`` to either a list of sheet names, a list of sheet positions, or ``None`` to read all sheets. Sheets can be specified by sheet index or sheet name, using an integer or string, @@ -3241,11 +3238,10 @@ for some advanced strategies .. warning:: - As of version 0.15.0, pandas requires ``PyTables`` >= 3.0.0. Stores written with prior versions of pandas / ``PyTables`` >= 2.3 are fully compatible (this was the previous minimum ``PyTables`` required version). - -.. warning:: - - There is a ``PyTables`` indexing bug which may appear when querying stores using an index. If you see a subset of results being returned, upgrade to ``PyTables`` >= 3.2. Stores created previously will need to be rewritten using the updated version. + pandas requires ``PyTables`` >= 3.0.0. + There is a indexing bug in ``PyTables`` < 3.2 which may appear when querying stores using an index. + If you see a subset of results being returned, upgrade to ``PyTables`` >= 3.2. + Stores created previously will need to be rewritten using the updated version. .. warning:: @@ -4210,10 +4206,8 @@ object : ``strings`` ``np.nan`` Categorical Data ++++++++++++++++ -.. versionadded:: 0.15.2 - -Writing data to a ``HDFStore`` that contains a ``category`` dtype was implemented -in 0.15.2. Queries work the same as if it was an object array. However, the ``category`` dtyped data is +You can write data that contains ``category`` dtypes to a ``HDFStore``. +Queries work the same as if it was an object array. However, the ``category`` dtyped data is stored in a more efficient manner. .. ipython:: python @@ -4228,21 +4222,6 @@ stored in a more efficient manner. result result.dtypes -.. warning:: - - The format of the ``Categorical`` is readable by prior versions of pandas (< 0.15.2), but will retrieve - the data as an integer based column (e.g. the ``codes``). However, the ``categories`` *can* be retrieved - but require the user to select them manually using the explicit meta path. - - The data is stored like so: - - .. ipython:: python - - cstore - - # to get the categories - cstore.select('dfcat/meta/A/meta') - .. ipython:: python :suppress: :okexcept: @@ -4746,8 +4725,6 @@ You can check if a table exists using :func:`~pandas.io.sql.has_table` Schema support '''''''''''''' -.. versionadded:: 0.15.0 - Reading from and writing to different schema's is supported through the ``schema`` keyword in the :func:`~pandas.read_sql_table` and :func:`~pandas.DataFrame.to_sql` functions. Note however that this depends on the database flavor (sqlite does not @@ -4975,8 +4952,6 @@ be used to read the file incrementally. pd.read_stata('stata.dta') -.. versionadded:: 0.16.0 - Specifying a ``chunksize`` yields a :class:`~pandas.io.stata.StataReader` instance that can be used to read ``chunksize`` lines from the file at a time. The ``StataReader`` @@ -5034,8 +5009,6 @@ values will have ``object`` data type. Categorical Data ++++++++++++++++ -.. versionadded:: 0.15.2 - ``Categorical`` data can be exported to *Stata* data files as value labeled data. The exported data consists of the underlying category codes as integer data values and the categories as value labels. *Stata* does not have an explicit equivalent diff --git a/doc/source/remote_data.rst b/doc/source/remote_data.rst index 7980133582125..9af66058a7aaa 100644 --- a/doc/source/remote_data.rst +++ b/doc/source/remote_data.rst @@ -11,14 +11,13 @@ Remote Data Access DataReader ---------- -The sub-package ``pandas.io.data`` is removed in favor of a separately -installable `pandas-datareader package +The sub-package ``pandas.io.data`` was deprecated in v.0.17 and removed in +`v.0.19 `__. + Instead there has been created a separately installable `pandas-datareader package `_. This will allow the data -modules to be independently updated to your pandas installation. The API for -``pandas-datareader v0.1.1`` is the same as in ``pandas v0.16.1``. -(:issue:`8961`) +modules to be independently updated on your pandas installation. - You should replace the imports of the following: + For code older than < 0.19 you should replace the imports of the following: .. code-block:: python diff --git a/doc/source/reshaping.rst b/doc/source/reshaping.rst index 3dce73b302c7c..fab83222b313f 100644 --- a/doc/source/reshaping.rst +++ b/doc/source/reshaping.rst @@ -569,8 +569,6 @@ This function is often used along with discretization functions like ``cut``: See also :func:`Series.str.get_dummies `. -.. versionadded:: 0.15.0 - :func:`get_dummies` also accepts a DataFrame. By default all categorical variables (categorical in the statistical sense, those with `object` or `categorical` dtype) are encoded as dummy variables. @@ -675,4 +673,4 @@ handling of NaN: you can use ``df["cat_col"] = pd.Categorical(df["col"])`` or ``df["cat_col"] = df["col"].astype("category")``. For full docs on :class:`~pandas.Categorical`, see the :ref:`Categorical introduction ` and the - :ref:`API documentation `. This feature was introduced in version 0.15. + :ref:`API documentation `. diff --git a/doc/source/sparse.rst b/doc/source/sparse.rst index b4884cf1c4141..cf16cee501a3e 100644 --- a/doc/source/sparse.rst +++ b/doc/source/sparse.rst @@ -216,8 +216,6 @@ To convert a ``SparseDataFrame`` back to sparse SciPy matrix in COO format, you SparseSeries ~~~~~~~~~~~~ -.. versionadded:: 0.16.0 - A :meth:`SparseSeries.to_coo` method is implemented for transforming a ``SparseSeries`` indexed by a ``MultiIndex`` to a ``scipy.sparse.coo_matrix``. The method requires a ``MultiIndex`` with two or more levels. diff --git a/doc/source/timedeltas.rst b/doc/source/timedeltas.rst index daa2c262c8c86..d055c49dc4721 100644 --- a/doc/source/timedeltas.rst +++ b/doc/source/timedeltas.rst @@ -23,13 +23,12 @@ Time Deltas *********** -.. note:: - - Starting in v0.15.0, we introduce a new scalar type ``Timedelta``, which is a subclass of ``datetime.timedelta``, and behaves in a similar manner, - but allows compatibility with ``np.timedelta64`` types as well as a host of custom representation, parsing, and attributes. +Timedeltas are differences in times, expressed in difference units, e.g. days, hours, minutes, +seconds. They can be both positive and negative. -Timedeltas are differences in times, expressed in difference units, e.g. days, hours, minutes, seconds. -They can be both positive and negative. +``Timedelta`` is a subclass of ``datetime.timedelta``, and behaves in a similar manner, +but allows compatibility with ``np.timedelta64`` types as well as a host of custom representation, +parsing, and attributes. Parsing ------- @@ -78,15 +77,10 @@ Further, operations among the scalars yield another scalar ``Timedelta``. to_timedelta ~~~~~~~~~~~~ -.. warning:: - - Prior to 0.15.0 ``pd.to_timedelta`` would return a ``Series`` for list-like/Series input, and a ``np.timedelta64`` for scalar input. - It will now return a ``TimedeltaIndex`` for list-like input, ``Series`` for Series input, and ``Timedelta`` for scalar input. - - The arguments to ``pd.to_timedelta`` are now ``(arg, unit='ns', box=True)``, previously were ``(arg, box=True, unit='ns')`` as these are more logical. - -Using the top-level ``pd.to_timedelta``, you can convert a scalar, array, list, or Series from a recognized timedelta format / value into a ``Timedelta`` type. -It will construct Series if the input is a Series, a scalar if the input is scalar-like, otherwise will output a ``TimedeltaIndex``. +Using the top-level ``pd.to_timedelta``, you can convert a scalar, array, list, +or Series from a recognized timedelta format / value into a ``Timedelta`` type. +It will construct Series if the input is a Series, a scalar if the input is +scalar-like, otherwise it will output a ``TimedeltaIndex``. You can parse a single string to a Timedelta: @@ -328,8 +322,6 @@ You can convert a ``Timedelta`` to an `ISO 8601 Duration`_ string with the TimedeltaIndex -------------- -.. versionadded:: 0.15.0 - To generate an index with time delta, you can use either the ``TimedeltaIndex`` or the ``timedelta_range`` constructor. diff --git a/doc/source/visualization.rst b/doc/source/visualization.rst index 839390c8778aa..b5a261e3acac5 100644 --- a/doc/source/visualization.rst +++ b/doc/source/visualization.rst @@ -229,8 +229,6 @@ To get horizontal bar plots, use the ``barh`` method: Histograms ~~~~~~~~~~ -.. versionadded:: 0.15.0 - Histogram can be drawn by using the :meth:`DataFrame.plot.hist` and :meth:`Series.plot.hist` methods. .. ipython:: python @@ -328,8 +326,6 @@ The ``by`` keyword can be specified to plot grouped histograms: Box Plots ~~~~~~~~~ -.. versionadded:: 0.15.0 - Boxplot can be drawn calling :meth:`Series.plot.box` and :meth:`DataFrame.plot.box`, or :meth:`DataFrame.boxplot` to visualize the distribution of values within each column. diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 33a6db18db3ca..636bb2dc3e60e 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -485,3 +485,4 @@ Other ^^^^^ - Bug in :func:`eval` where the ``inplace`` parameter was being incorrectly handled (:issue:`16732`) - Several ``NaT`` method docstrings (e.g. :func:`NaT.ctime`) were incorrect (:issue:`17327`) +- The documentation has had references to versions < v0.16 removed and cleaned up (:issue:`17442`, :issue:`17442` & :issue:`#17404`) From 24b440e67abb3b14856f0fd920141f5a6dcf83fd Mon Sep 17 00:00:00 2001 From: Dillon Niederhut Date: Thu, 7 Sep 2017 05:52:11 -0500 Subject: [PATCH 17/57] BUG: revert collision warning (#17298) --- doc/source/indexing.rst | 15 --------------- doc/source/whatsnew/v0.21.0.txt | 24 +++--------------------- pandas/core/generic.py | 8 ++------ pandas/tests/dtypes/test_generic.py | 5 ----- pandas/tests/io/test_pytables.py | 4 ++-- 5 files changed, 7 insertions(+), 49 deletions(-) diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst index 88e62b5d301a3..8474116c38082 100644 --- a/doc/source/indexing.rst +++ b/doc/source/indexing.rst @@ -269,21 +269,6 @@ new column. In 0.21.0 and later, this will raise a ``UserWarning``: 1 2.0 2 3.0 -Similarly, it is possible to create a column with a name which collides with one of Pandas's -built-in methods or attributes, which can cause confusion later when attempting to access -that column as an attribute. This behavior now warns: - -.. code-block:: ipython - - In[4]: df['sum'] = [5., 7., 9.] - UserWarning: Column name 'sum' collides with a built-in method, which will cause unexpected attribute behavior - In[5]: df.sum - Out[5]: - - Slicing ranges -------------- diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 636bb2dc3e60e..fa00140fb4abd 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -67,8 +67,8 @@ Improved warnings when attempting to create columns ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ New users are often flummoxed by the relationship between column operations and attribute -access on ``DataFrame`` instances (:issue:`5904` & :issue:`7175`). Two specific instances -of this confusion include attempting to create a new column by setting into an attribute: +access on ``DataFrame`` instances (:issue:`7175`). One specific instance +of this confusion is attempting to create a new column by setting into an attribute: .. code-block:: ipython @@ -86,25 +86,7 @@ This does not raise any obvious exceptions, but also does not create a new colum 1 2.0 2 3.0 -The second source of confusion is creating a column whose name collides with a method or -attribute already in the instance namespace: - -.. code-block:: ipython - - In[4]: df['sum'] = [5., 7., 9.] - -This does not permit that column to be accessed as an attribute: - -.. code-block:: ipython - - In[5]: df.sum - Out[5]: - - -Both of these now raise a ``UserWarning`` about the potential for unexpected behavior. See :ref:`Attribute Access `. +Setting a list-like data structure into a new attribute now raise a ``UserWarning`` about the potential for unexpected behavior. See :ref:`Attribute Access `. .. _whatsnew_0210.enhancements.other: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index cdb08d8887e05..df5f1a8326acd 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1905,10 +1905,6 @@ def _slice(self, slobj, axis=0, kind=None): return result def _set_item(self, key, value): - if isinstance(key, str) and callable(getattr(self, key, None)): - warnings.warn("Column name '{key}' collides with a built-in " - "method, which will cause unexpected attribute " - "behavior".format(key=key), stacklevel=3) self._data.set(key, value) self._clear_item_cache() @@ -3441,8 +3437,8 @@ def __setattr__(self, name, value): object.__setattr__(self, name, value) except (AttributeError, TypeError): if isinstance(self, ABCDataFrame) and (is_list_like(value)): - warnings.warn("Pandas doesn't allow Series to be assigned " - "into nonexistent columns - see " + warnings.warn("Pandas doesn't allow columns to be " + "created via a new attribute name - see " "https://pandas.pydata.org/pandas-docs/" "stable/indexing.html#attribute-access", stacklevel=2) diff --git a/pandas/tests/dtypes/test_generic.py b/pandas/tests/dtypes/test_generic.py index 82444d6c94157..bd365f9c3281f 100644 --- a/pandas/tests/dtypes/test_generic.py +++ b/pandas/tests/dtypes/test_generic.py @@ -48,7 +48,6 @@ def test_abc_types(self): def test_setattr_warnings(): - # GH5904 - Suggestion: Warning for DataFrame colname-methodname clash # GH7175 - GOTCHA: You can't use dot notation to add a column... d = {'one': pd.Series([1., 2., 3.], index=['a', 'b', 'c']), 'two': pd.Series([1., 2., 3., 4.], index=['a', 'b', 'c', 'd'])} @@ -78,7 +77,3 @@ def test_setattr_warnings(): # warn when setting column to nonexistent name df.four = df.two + 2 assert df.four.sum() > df.two.sum() - - with tm.assert_produces_warning(UserWarning): - # warn when column has same name as method - df['sum'] = df.two diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index b5ecc4d34cd08..9c488cb2389be 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -2011,7 +2011,7 @@ def check(obj, comparator): df['string'] = 'foo' df['float322'] = 1. df['float322'] = df['float322'].astype('float32') - df['boolean'] = df['float322'] > 0 + df['bool'] = df['float322'] > 0 df['time1'] = Timestamp('20130101') df['time2'] = Timestamp('20130102') check(df, tm.assert_frame_equal) @@ -2141,7 +2141,7 @@ def test_table_values_dtypes_roundtrip(self): df1['string'] = 'foo' df1['float322'] = 1. df1['float322'] = df1['float322'].astype('float32') - df1['boolean'] = df1['float32'] > 0 + df1['bool'] = df1['float32'] > 0 df1['time1'] = Timestamp('20130101') df1['time2'] = Timestamp('20130102') From 8a8a4fd74dc1dd2804d5f605fcad47e6f0fd4b60 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Thu, 7 Sep 2017 04:28:12 -0700 Subject: [PATCH 18/57] cdef out dtype for _Timestamp._get_field (#17457) --- pandas/_libs/tslib.pyx | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index a7b33c669a8b8..7e009652f7f0c 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -1275,6 +1275,7 @@ cdef class _Timestamp(datetime): cpdef _get_field(self, field): cdef: int64_t val + ndarray[int32_t] out val = self._maybe_convert_value_to_local() out = get_date_field(np.array([val], dtype=np.int64), field) return int(out[0]) @@ -1282,6 +1283,7 @@ cdef class _Timestamp(datetime): cpdef _get_named_field(self, field): cdef: int64_t val + ndarray[object] out val = self._maybe_convert_value_to_local() out = get_date_name_field(np.array([val], dtype=np.int64), field) return out[0] @@ -1291,9 +1293,7 @@ cdef class _Timestamp(datetime): 'startingMonth', self.freq.kwds.get( 'month', 12)) if self.freq else 12 freqstr = self.freqstr if self.freq else None - val = self.value - if self.tz is not None and not _is_utc(self.tz): - val = tz_convert_single(self.value, 'UTC', self.tz) + val = self._maybe_convert_value_to_local() out = get_start_end_field( np.array([val], dtype=np.int64), field, freqstr, month_kw) return out[0] From 9dc01c4f9142908c4a7db5a3a0300685f6d43308 Mon Sep 17 00:00:00 2001 From: Sam Foo Date: Thu, 7 Sep 2017 07:35:40 -0400 Subject: [PATCH 19/57] DOC: Add Timestamp, Period, Timedelta, and Interval to api.rst (#17424) --- doc/source/api.rst | 195 ++++++++++++++++++++++++++++++++++++++++ pandas/_libs/period.pyx | 2 +- 2 files changed, 196 insertions(+), 1 deletion(-) diff --git a/doc/source/api.rst b/doc/source/api.rst index 12e6c7ad7f630..d34cec86638fb 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -1599,6 +1599,201 @@ Conversion TimedeltaIndex.floor TimedeltaIndex.ceil +.. currentmodule:: pandas + +Scalars +------- + +Period +~~~~~~ +.. autosummary:: + :toctree: generated/ + + Period + +Attributes +~~~~~~~~~~ +.. autosummary:: + :toctree: generated/ + + Period.day + Period.dayofweek + Period.dayofyear + Period.days_in_month + Period.daysinmonth + Period.end_time + Period.freq + Period.freqstr + Period.hour + Period.is_leap_year + Period.minute + Period.month + Period.now + Period.ordinal + Period.quarter + Period.qyear + Period.second + Period.start_time + Period.strftime + Period.week + Period.weekday + Period.weekofyear + Period.year + +Methods +~~~~~~~ +.. autosummary:: + :toctree: generated/ + + Period.asfreq + Period.strftime + Period.to_timestamp + +Timestamp +~~~~~~~~~ +.. autosummary:: + :toctree: generated/ + + Timestamp + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: generated/ + + Timestamp.asm8 + Timestamp.day + Timestamp.dayofweek + Timestamp.dayofyear + Timestamp.days_in_month + Timestamp.daysinmonth + Timestamp.hour + Timestamp.is_leap_year + Timestamp.is_month_end + Timestamp.is_month_start + Timestamp.is_quarter_end + Timestamp.is_quarter_start + Timestamp.is_year_end + Timestamp.is_year_start + Timestamp.max + Timestamp.microsecond + Timestamp.min + Timestamp.month + Timestamp.nanosecond + Timestamp.quarter + Timestamp.resolution + Timestamp.second + Timestamp.tz + Timestamp.tzinfo + Timestamp.value + Timestamp.weekday_name + Timestamp.weekofyear + Timestamp.year + +Methods +~~~~~~~ +.. autosummary:: + :toctree: generated/ + + Timestamp.astimezone + Timestamp.ceil + Timestamp.combine + Timestamp.ctime + Timestamp.date + Timestamp.dst + Timestamp.floor + Timestamp.freq + Timestamp.freqstr + Timestamp.from_ordinal + Timestamp.fromtimestamp + Timestamp.isocalendar + Timestamp.isoformat + Timestamp.isoweekday + Timestamp.normalize + Timestamp.now + Timestamp.replace + Timestamp.round + Timestamp.strftime + Timestamp.strptime + Timestamp.time + Timestamp.timetuple + Timestamp.timetz + Timestamp.to_datetime64 + Timestamp.to_julian_date + Timestamp.to_period + Timestamp.to_pydatetime + Timestamp.today + Timestamp.toordinal + Timestamp.tz_convert + Timestamp.tz_localize + Timestamp.tzname + Timestamp.utcfromtimestamp + Timestamp.utcnow + Timestamp.utcoffset + Timestamp.utctimetuple + Timestamp.weekday + +Interval +~~~~~~~~ +.. autosummary:: + :toctree: generated/ + + Interval + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree generated/ + + Interval.closed + Interval.closed_left + Interval.closed_right + Interval.left + Interval.mid + Interval.open_left + Interval.open_right + Interval.right + +Timedelta +~~~~~~~~~ +.. autosummary:: + :toctree: generated/ + + Timedelta + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree generated/ + + Timedelta.asm8 + Timedelta.components + Timedelta.days + Timedelta.delta + Timedelta.freq + Timedelta.is_populated + Timedelta.max + Timedelta.microseconds + Timedelta.min + Timedelta.nanoseconds + Timedelta.resolution + Timedelta.seconds + Timedelta.value + +Methods +~~~~~~~ +.. autosummary:: + :toctree generated/ + + Timedelta.ceil + Timedelta.floor + Timedelta.isoformat + Timedelta.round + Timdelta.to_pytimedelta + Timedelta.to_timedelta64 + Timedelta.total_seconds + Timedelta.view + Window ------ .. currentmodule:: pandas.core.window diff --git a/pandas/_libs/period.pyx b/pandas/_libs/period.pyx index 0ade8f9a6dde5..8f89b812fec04 100644 --- a/pandas/_libs/period.pyx +++ b/pandas/_libs/period.pyx @@ -1102,7 +1102,7 @@ cdef class _Period(object): class Period(_Period): """ - Represents an period of time + Represents a period of time Parameters ---------- From aee2ae086e0972aabcb43d05fa2a404153e3b3b5 Mon Sep 17 00:00:00 2001 From: majiang Date: Thu, 7 Sep 2017 20:41:24 +0900 Subject: [PATCH 20/57] DOC: to_json (#17461) --- pandas/core/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index df5f1a8326acd..8d16b079ba2c8 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1265,7 +1265,7 @@ def to_json(self, path_or_buf=None, orient=None, date_format=None, Parameters ---------- path_or_buf : the path or buffer to write the result string - if this is None, return a StringIO of the converted string + if this is None, return the converted string orient : string * Series From 3a291bb7170ca900cb1b886a3c0b39976a9870ef Mon Sep 17 00:00:00 2001 From: jschendel Date: Thu, 7 Sep 2017 05:49:27 -0600 Subject: [PATCH 21/57] BUG: Index._searchsorted_monotonic(..., side='right') returns the left side position for monotonic decreasing indexes (#17272) --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/indexes/base.py | 2 +- pandas/tests/indexes/common.py | 59 +++++++++++++++++-- .../indexes/datetimes/test_datetimelike.py | 4 +- pandas/tests/indexes/period/test_period.py | 4 +- pandas/tests/indexes/test_base.py | 3 +- pandas/tests/indexes/test_numeric.py | 12 ++-- pandas/tests/indexes/test_range.py | 3 +- pandas/tests/indexing/test_interval.py | 56 +++++++++++------- 9 files changed, 111 insertions(+), 33 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index fa00140fb4abd..d3c61adccc7a6 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -400,6 +400,7 @@ Indexing - Bug in ``.isin()`` in which checking membership in empty ``Series`` objects raised an error (:issue:`16991`) - Bug in ``CategoricalIndex`` reindexing in which specified indices containing duplicates were not being respected (:issue:`17323`) - Bug in intersection of ``RangeIndex`` with negative step (:issue:`17296`) +- Bug in ``IntervalIndex`` where performing a scalar lookup fails for included right endpoints of non-overlapping monotonic decreasing indexes (:issue:`16417`, :issue:`17271`) I/O ^^^ diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index a9098126a38e3..ef5f68936044a 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3465,7 +3465,7 @@ def _searchsorted_monotonic(self, label, side='left'): # everything for it to work (element ordering, search side and # resulting value). pos = self[::-1].searchsorted(label, side='right' if side == 'left' - else 'right') + else 'left') return len(self) - pos raise ValueError('index must be monotonic increasing or decreasing') diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 1fdc08d68eb26..90618cd6e235f 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -11,6 +11,7 @@ RangeIndex, MultiIndex, CategoricalIndex, DatetimeIndex, TimedeltaIndex, PeriodIndex, IntervalIndex, notna, isna) +from pandas.core.indexes.base import InvalidIndexError from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin from pandas.core.dtypes.common import needs_i8_conversion from pandas._libs.tslib import iNaT @@ -138,9 +139,14 @@ def test_get_indexer_consistency(self): if isinstance(index, IntervalIndex): continue - indexer = index.get_indexer(index[0:2]) - assert isinstance(indexer, np.ndarray) - assert indexer.dtype == np.intp + if index.is_unique or isinstance(index, CategoricalIndex): + indexer = index.get_indexer(index[0:2]) + assert isinstance(indexer, np.ndarray) + assert indexer.dtype == np.intp + else: + e = "Reindexing only valid with uniquely valued Index objects" + with tm.assert_raises_regex(InvalidIndexError, e): + indexer = index.get_indexer(index[0:2]) indexer, _ = index.get_indexer_non_unique(index[0:2]) assert isinstance(indexer, np.ndarray) @@ -632,7 +638,8 @@ def test_difference_base(self): pass elif isinstance(idx, (DatetimeIndex, TimedeltaIndex)): assert result.__class__ == answer.__class__ - tm.assert_numpy_array_equal(result.asi8, answer.asi8) + tm.assert_numpy_array_equal(result.sort_values().asi8, + answer.sort_values().asi8) else: result = first.difference(case) assert tm.equalContents(result, answer) @@ -954,3 +961,47 @@ def test_join_self_unique(self, how): if index.is_unique: joined = index.join(index, how=how) assert (index == joined).all() + + def test_searchsorted_monotonic(self): + # GH17271 + for index in self.indices.values(): + # not implemented for tuple searches in MultiIndex + # or Intervals searches in IntervalIndex + if isinstance(index, (MultiIndex, IntervalIndex)): + continue + + # nothing to test if the index is empty + if index.empty: + continue + value = index[0] + + # determine the expected results (handle dupes for 'right') + expected_left, expected_right = 0, (index == value).argmin() + if expected_right == 0: + # all values are the same, expected_right should be length + expected_right = len(index) + + # test _searchsorted_monotonic in all cases + # test searchsorted only for increasing + if index.is_monotonic_increasing: + ssm_left = index._searchsorted_monotonic(value, side='left') + assert expected_left == ssm_left + + ssm_right = index._searchsorted_monotonic(value, side='right') + assert expected_right == ssm_right + + ss_left = index.searchsorted(value, side='left') + assert expected_left == ss_left + + ss_right = index.searchsorted(value, side='right') + assert expected_right == ss_right + elif index.is_monotonic_decreasing: + ssm_left = index._searchsorted_monotonic(value, side='left') + assert expected_left == ssm_left + + ssm_right = index._searchsorted_monotonic(value, side='right') + assert expected_right == ssm_right + else: + # non-monotonic should raise. + with pytest.raises(ValueError): + index._searchsorted_monotonic(value, side='left') diff --git a/pandas/tests/indexes/datetimes/test_datetimelike.py b/pandas/tests/indexes/datetimes/test_datetimelike.py index 3b970ee382521..538e10e6011ec 100644 --- a/pandas/tests/indexes/datetimes/test_datetimelike.py +++ b/pandas/tests/indexes/datetimes/test_datetimelike.py @@ -12,7 +12,9 @@ class TestDatetimeIndex(DatetimeLike): _holder = DatetimeIndex def setup_method(self, method): - self.indices = dict(index=tm.makeDateIndex(10)) + self.indices = dict(index=tm.makeDateIndex(10), + index_dec=date_range('20130110', periods=10, + freq='-1D')) self.setup_indices() def create_index(self): diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index e24e2ad936e2c..51f7d13cb0638 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -18,7 +18,9 @@ class TestPeriodIndex(DatetimeLike): _multiprocess_can_split_ = True def setup_method(self, method): - self.indices = dict(index=tm.makePeriodIndex(10)) + self.indices = dict(index=tm.makePeriodIndex(10), + index_dec=period_range('20130101', periods=10, + freq='D')[::-1]) self.setup_indices() def create_index(self): diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index f96dbdcfb8acf..d69fbbcdf4bf6 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -46,7 +46,8 @@ def setup_method(self, method): catIndex=tm.makeCategoricalIndex(100), empty=Index([]), tuples=MultiIndex.from_tuples(lzip( - ['foo', 'bar', 'baz'], [1, 2, 3]))) + ['foo', 'bar', 'baz'], [1, 2, 3])), + repeats=Index([0, 0, 1, 1, 2, 2])) self.setup_indices() def create_index(self): diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index 1a0a38c173284..7e7e10e4aeabe 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -181,7 +181,9 @@ class TestFloat64Index(Numeric): def setup_method(self, method): self.indices = dict(mixed=Float64Index([1.5, 2, 3, 4, 5]), - float=Float64Index(np.arange(5) * 2.5)) + float=Float64Index(np.arange(5) * 2.5), + mixed_dec=Float64Index([5, 4, 3, 2, 1.5]), + float_dec=Float64Index(np.arange(4, -1, -1) * 2.5)) self.setup_indices() def create_index(self): @@ -654,7 +656,8 @@ class TestInt64Index(NumericInt): _holder = Int64Index def setup_method(self, method): - self.indices = dict(index=Int64Index(np.arange(0, 20, 2))) + self.indices = dict(index=Int64Index(np.arange(0, 20, 2)), + index_dec=Int64Index(np.arange(19, -1, -1))) self.setup_indices() def create_index(self): @@ -949,8 +952,9 @@ class TestUInt64Index(NumericInt): _holder = UInt64Index def setup_method(self, method): - self.indices = dict(index=UInt64Index([2**63, 2**63 + 10, 2**63 + 15, - 2**63 + 20, 2**63 + 25])) + vals = [2**63, 2**63 + 10, 2**63 + 15, 2**63 + 20, 2**63 + 25] + self.indices = dict(index=UInt64Index(vals), + index_dec=UInt64Index(reversed(vals))) self.setup_indices() def create_index(self): diff --git a/pandas/tests/indexes/test_range.py b/pandas/tests/indexes/test_range.py index 06c8f0ee392c7..d206c36ee51c9 100644 --- a/pandas/tests/indexes/test_range.py +++ b/pandas/tests/indexes/test_range.py @@ -25,7 +25,8 @@ class TestRangeIndex(Numeric): _compat_props = ['shape', 'ndim', 'size', 'itemsize'] def setup_method(self, method): - self.indices = dict(index=RangeIndex(0, 20, 2, name='foo')) + self.indices = dict(index=RangeIndex(0, 20, 2, name='foo'), + index_dec=RangeIndex(18, -1, -2, name='bar')) self.setup_indices() def create_index(self): diff --git a/pandas/tests/indexing/test_interval.py b/pandas/tests/indexing/test_interval.py index be6e5e1cffb2e..31a94abcd99a5 100644 --- a/pandas/tests/indexing/test_interval.py +++ b/pandas/tests/indexing/test_interval.py @@ -3,6 +3,7 @@ import pandas as pd from pandas import Series, DataFrame, IntervalIndex, Interval +from pandas.compat import product import pandas.util.testing as tm @@ -14,16 +15,6 @@ def setup_method(self, method): def test_loc_with_scalar(self): s = self.s - expected = 0 - - result = s.loc[0.5] - assert result == expected - - result = s.loc[1] - assert result == expected - - with pytest.raises(KeyError): - s.loc[0] expected = s.iloc[:3] tm.assert_series_equal(expected, s.loc[:3]) @@ -42,16 +33,6 @@ def test_loc_with_scalar(self): def test_getitem_with_scalar(self): s = self.s - expected = 0 - - result = s[0.5] - assert result == expected - - result = s[1] - assert result == expected - - with pytest.raises(KeyError): - s[0] expected = s.iloc[:3] tm.assert_series_equal(expected, s[:3]) @@ -67,6 +48,41 @@ def test_getitem_with_scalar(self): expected = s.iloc[2:5] tm.assert_series_equal(expected, s[s >= 2]) + @pytest.mark.parametrize('direction, closed', + product(('increasing', 'decreasing'), + ('left', 'right', 'neither', 'both'))) + def test_nonoverlapping_monotonic(self, direction, closed): + tpls = [(0, 1), (2, 3), (4, 5)] + if direction == 'decreasing': + tpls = reversed(tpls) + + idx = IntervalIndex.from_tuples(tpls, closed=closed) + s = Series(list('abc'), idx) + + for key, expected in zip(idx.left, s): + if idx.closed_left: + assert s[key] == expected + assert s.loc[key] == expected + else: + with pytest.raises(KeyError): + s[key] + with pytest.raises(KeyError): + s.loc[key] + + for key, expected in zip(idx.right, s): + if idx.closed_right: + assert s[key] == expected + assert s.loc[key] == expected + else: + with pytest.raises(KeyError): + s[key] + with pytest.raises(KeyError): + s.loc[key] + + for key, expected in zip(idx.mid, s): + assert s[key] == expected + assert s.loc[key] == expected + def test_with_interval(self): s = self.s From ee6185e2fb9461632949f3ba52a28b37a1f7296e Mon Sep 17 00:00:00 2001 From: Matti Picus Date: Thu, 7 Sep 2017 14:56:33 +0300 Subject: [PATCH 22/57] COMPAT: Pypy tweaks (#17351) --- doc/source/whatsnew/v0.21.0.txt | 11 ++++- pandas/_libs/src/ujson/python/JSONtoObj.c | 16 +++---- pandas/io/parsers.py | 1 + pandas/tests/indexes/test_base.py | 16 +++++-- pandas/tests/indexes/test_multi.py | 13 +++++- pandas/tests/io/parser/test_parsers.py | 52 ++++++++++++++++++++++- 6 files changed, 92 insertions(+), 17 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index d3c61adccc7a6..f50052347cfb5 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -371,13 +371,11 @@ Performance Improvements Bug Fixes ~~~~~~~~~ - Conversion ^^^^^^^^^^ - Bug in assignment against datetime-like data with ``int`` may incorrectly convert to datetime-like (:issue:`14145`) - Bug in assignment against ``int64`` data with ``np.ndarray`` with ``float64`` dtype may keep ``int64`` dtype (:issue:`14001`) -- Fix :func:`DataFrame.memory_usage` to support PyPy. Objects on PyPy do not have a fixed size, so an approximation is used instead (:issue:`17228`) - Fixed the return type of ``IntervalIndex.is_non_overlapping_monotonic`` to be a Python ``bool`` for consistency with similar attributes/methods. Previously returned a ``numpy.bool_``. (:issue:`17237`) - Bug in ``IntervalIndex.is_non_overlapping_monotonic`` when intervals are closed on both sides and overlap at a point (:issue:`16560`) - Bug in :func:`Series.fillna` returns frame when ``inplace=True`` and ``value`` is dict (:issue:`16156`) @@ -463,6 +461,15 @@ Categorical the ``.categories`` to be an empty ``Float64Index`` rather than an empty ``Index`` with object dtype (:issue:`17248`) +PyPy +^^^^ + +- Compatibility with PyPy in :func:`read_csv` with ``usecols=[]`` and + :func:`read_json` (:issue:`17351`) +- Split tests into cases for CPython and PyPy where needed, which highlights the fragility + of index matching with ``float('nan')``, ``np.nan`` and ``NAT`` (:issue:`17351`) +- Fix :func:`DataFrame.memory_usage` to support PyPy. Objects on PyPy do not have a fixed size, + so an approximation is used instead (:issue:`17228`) Other ^^^^^ diff --git a/pandas/_libs/src/ujson/python/JSONtoObj.c b/pandas/_libs/src/ujson/python/JSONtoObj.c index b0132532c16af..85cf1d5e5e7a1 100644 --- a/pandas/_libs/src/ujson/python/JSONtoObj.c +++ b/pandas/_libs/src/ujson/python/JSONtoObj.c @@ -409,7 +409,7 @@ JSOBJ Object_npyEndObject(void *prv, JSOBJ obj) { } int Object_npyObjectAddKey(void *prv, JSOBJ obj, JSOBJ name, JSOBJ value) { - PyObject *label; + PyObject *label, *labels; npy_intp labelidx; // add key to label array, value to values array NpyArrContext *npyarr = (NpyArrContext *)obj; @@ -424,11 +424,11 @@ int Object_npyObjectAddKey(void *prv, JSOBJ obj, JSOBJ name, JSOBJ value) { if (!npyarr->labels[labelidx]) { npyarr->labels[labelidx] = PyList_New(0); } - + labels = npyarr->labels[labelidx]; // only fill label array once, assumes all column labels are the same // for 2-dimensional arrays. - if (PyList_GET_SIZE(npyarr->labels[labelidx]) <= npyarr->elcount) { - PyList_Append(npyarr->labels[labelidx], label); + if (PyList_Check(labels) && PyList_GET_SIZE(labels) <= npyarr->elcount) { + PyList_Append(labels, label); } if (((JSONObjectDecoder *)npyarr->dec)->arrayAddItem(prv, obj, value)) { @@ -439,16 +439,16 @@ int Object_npyObjectAddKey(void *prv, JSOBJ obj, JSOBJ name, JSOBJ value) { } int Object_objectAddKey(void *prv, JSOBJ obj, JSOBJ name, JSOBJ value) { - PyDict_SetItem(obj, name, value); + int ret = PyDict_SetItem(obj, name, value); Py_DECREF((PyObject *)name); Py_DECREF((PyObject *)value); - return 1; + return ret == 0 ? 1 : 0; } int Object_arrayAddItem(void *prv, JSOBJ obj, JSOBJ value) { - PyList_Append(obj, value); + int ret = PyList_Append(obj, value); Py_DECREF((PyObject *)value); - return 1; + return ret == 0 ? 1 : 0; } JSOBJ Object_newString(void *prv, wchar_t *start, wchar_t *end) { diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 8b1a921536a1d..6adf154aabba7 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -1716,6 +1716,7 @@ def _set_noconvert_columns(self): # A set of integers will be converted to a list in # the correct order every single time. usecols = list(self.usecols) + usecols.sort() elif (callable(self.usecols) or self.usecols_dtype not in ('empty', None)): # The names attribute should have the correct columns diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index d69fbbcdf4bf6..fa73c9fc7b722 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -9,7 +9,7 @@ from pandas.tests.indexes.common import Base from pandas.compat import (range, lrange, lzip, u, - text_type, zip, PY3, PY36) + text_type, zip, PY3, PY36, PYPY) import operator import numpy as np @@ -1370,13 +1370,21 @@ def test_isin(self): assert len(result) == 0 assert result.dtype == np.bool_ - def test_isin_nan(self): + @pytest.mark.skipif(PYPY, reason="np.nan is float('nan') on PyPy") + def test_isin_nan_not_pypy(self): + tm.assert_numpy_array_equal(Index(['a', np.nan]).isin([float('nan')]), + np.array([False, False])) + + @pytest.mark.skipif(not PYPY, reason="np.nan is float('nan') on PyPy") + def test_isin_nan_pypy(self): + tm.assert_numpy_array_equal(Index(['a', np.nan]).isin([float('nan')]), + np.array([False, True])) + + def test_isin_nan_common(self): tm.assert_numpy_array_equal(Index(['a', np.nan]).isin([np.nan]), np.array([False, True])) tm.assert_numpy_array_equal(Index(['a', pd.NaT]).isin([pd.NaT]), np.array([False, True])) - tm.assert_numpy_array_equal(Index(['a', np.nan]).isin([float('nan')]), - np.array([False, False])) tm.assert_numpy_array_equal(Index(['a', np.nan]).isin([pd.NaT]), np.array([False, False])) diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index 798d244468961..86308192c9166 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -14,7 +14,7 @@ from pandas import (CategoricalIndex, DataFrame, Index, MultiIndex, compat, date_range, period_range) -from pandas.compat import PY3, long, lrange, lzip, range, u +from pandas.compat import PY3, long, lrange, lzip, range, u, PYPY from pandas.errors import PerformanceWarning, UnsortedIndexError from pandas.core.indexes.base import InvalidIndexError from pandas._libs import lib @@ -2571,13 +2571,22 @@ def test_isin(self): assert len(result) == 0 assert result.dtype == np.bool_ - def test_isin_nan(self): + @pytest.mark.skipif(PYPY, reason="tuples cmp recursively on PyPy") + def test_isin_nan_not_pypy(self): idx = MultiIndex.from_arrays([['foo', 'bar'], [1.0, np.nan]]) tm.assert_numpy_array_equal(idx.isin([('bar', np.nan)]), np.array([False, False])) tm.assert_numpy_array_equal(idx.isin([('bar', float('nan'))]), np.array([False, False])) + @pytest.mark.skipif(not PYPY, reason="tuples cmp recursively on PyPy") + def test_isin_nan_pypy(self): + idx = MultiIndex.from_arrays([['foo', 'bar'], [1.0, np.nan]]) + tm.assert_numpy_array_equal(idx.isin([('bar', np.nan)]), + np.array([False, True])) + tm.assert_numpy_array_equal(idx.isin([('bar', float('nan'))]), + np.array([False, True])) + def test_isin_level_kwarg(self): idx = MultiIndex.from_arrays([['qux', 'baz', 'foo', 'bar'], np.arange( 4)]) diff --git a/pandas/tests/io/parser/test_parsers.py b/pandas/tests/io/parser/test_parsers.py index 2fee2451c5e36..0ea4757b10e94 100644 --- a/pandas/tests/io/parser/test_parsers.py +++ b/pandas/tests/io/parser/test_parsers.py @@ -3,8 +3,10 @@ import os import pandas.util.testing as tm -from pandas import read_csv, read_table +from pandas import read_csv, read_table, DataFrame from pandas.core.common import AbstractMethodError +from pandas._libs.lib import Timestamp +from pandas.compat import StringIO from .common import ParserTests from .header import HeaderTests @@ -100,3 +102,51 @@ def read_table(self, *args, **kwds): kwds = kwds.copy() kwds['engine'] = self.engine return read_table(*args, **kwds) + + +class TestUnsortedUsecols(object): + def test_override__set_noconvert_columns(self): + # GH 17351 - usecols needs to be sorted in _setnoconvert_columns + # based on the test_usecols_with_parse_dates test from usecols.py + from pandas.io.parsers import CParserWrapper, TextFileReader + + s = """a,b,c,d,e + 0,1,20140101,0900,4 + 0,1,20140102,1000,4""" + + parse_dates = [[1, 2]] + cols = { + 'a': [0, 0], + 'c_d': [ + Timestamp('2014-01-01 09:00:00'), + Timestamp('2014-01-02 10:00:00') + ] + } + expected = DataFrame(cols, columns=['c_d', 'a']) + + class MyTextFileReader(TextFileReader): + def __init__(self): + self._currow = 0 + self.squeeze = False + + class MyCParserWrapper(CParserWrapper): + def _set_noconvert_columns(self): + if self.usecols_dtype == 'integer': + # self.usecols is a set, which is documented as unordered + # but in practice, a CPython set of integers is sorted. + # In other implementations this assumption does not hold. + # The following code simulates a different order, which + # before GH 17351 would cause the wrong columns to be + # converted via the parse_dates parameter + self.usecols = list(self.usecols) + self.usecols.reverse() + return CParserWrapper._set_noconvert_columns(self) + + parser = MyTextFileReader() + parser.options = {'usecols': [0, 2, 3], + 'parse_dates': parse_dates, + 'delimiter': ','} + parser._engine = MyCParserWrapper(StringIO(s), **parser.options) + df = parser.read() + + tm.assert_frame_equal(df, expected) From 46832ac8f465aa911ba79ebc1b1a4d0f6baf46f9 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 7 Sep 2017 17:46:12 -0700 Subject: [PATCH 23/57] Replace * imports with explicit imports; remove unused declared constants (#17470) --- pandas/_libs/src/skiplist.pyx | 1 - pandas/_libs/window.pyx | 38 ++++++----------------------------- 2 files changed, 6 insertions(+), 33 deletions(-) diff --git a/pandas/_libs/src/skiplist.pyx b/pandas/_libs/src/skiplist.pyx index 559b529822a69..1524dca38d0e0 100644 --- a/pandas/_libs/src/skiplist.pyx +++ b/pandas/_libs/src/skiplist.pyx @@ -15,7 +15,6 @@ cdef double Log2(double x): return log(x) / log(2.) cimport numpy as np -from numpy cimport * import numpy as np from random import random diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx index 9fb3d0662eb4f..b6bd6f92f6199 100644 --- a/pandas/_libs/window.pyx +++ b/pandas/_libs/window.pyx @@ -1,55 +1,29 @@ # cython: profile=False # cython: boundscheck=False, wraparound=False, cdivision=True -from numpy cimport * +from cython cimport Py_ssize_t + cimport numpy as np import numpy as np cimport cython -import_array() +np.import_array() cimport util from libc.stdlib cimport malloc, free -from numpy cimport NPY_INT8 as NPY_int8 -from numpy cimport NPY_INT16 as NPY_int16 -from numpy cimport NPY_INT32 as NPY_int32 -from numpy cimport NPY_INT64 as NPY_int64 -from numpy cimport NPY_FLOAT16 as NPY_float16 -from numpy cimport NPY_FLOAT32 as NPY_float32 -from numpy cimport NPY_FLOAT64 as NPY_float64 - -from numpy cimport (int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t, - uint32_t, uint64_t, float16_t, float32_t, float64_t) - -int8 = np.dtype(np.int8) -int16 = np.dtype(np.int16) -int32 = np.dtype(np.int32) -int64 = np.dtype(np.int64) -float16 = np.dtype(np.float16) -float32 = np.dtype(np.float32) -float64 = np.dtype(np.float64) - -cdef np.int8_t MINint8 = np.iinfo(np.int8).min -cdef np.int16_t MINint16 = np.iinfo(np.int16).min -cdef np.int32_t MINint32 = np.iinfo(np.int32).min -cdef np.int64_t MINint64 = np.iinfo(np.int64).min -cdef np.float16_t MINfloat16 = np.NINF + +from numpy cimport ndarray, double_t, int64_t, float64_t + cdef np.float32_t MINfloat32 = np.NINF cdef np.float64_t MINfloat64 = np.NINF -cdef np.int8_t MAXint8 = np.iinfo(np.int8).max -cdef np.int16_t MAXint16 = np.iinfo(np.int16).max -cdef np.int32_t MAXint32 = np.iinfo(np.int32).max -cdef np.int64_t MAXint64 = np.iinfo(np.int64).max -cdef np.float16_t MAXfloat16 = np.inf cdef np.float32_t MAXfloat32 = np.inf cdef np.float64_t MAXfloat64 = np.inf cdef double NaN = np.NaN -cdef double nan = NaN cdef inline int int_max(int a, int b): return a if a >= b else b cdef inline int int_min(int a, int b): return a if a <= b else b From 9c4e4c8959853c7cda554d8e9b530efdd8ef9cb1 Mon Sep 17 00:00:00 2001 From: Sam Foo Date: Thu, 7 Sep 2017 20:47:52 -0400 Subject: [PATCH 24/57] Removed Timedelta.is_populated and fixed spelling errors (#17469) --- doc/source/api.rst | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/doc/source/api.rst b/doc/source/api.rst index d34cec86638fb..c32a541d19605 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -1704,7 +1704,7 @@ Methods Timestamp.floor Timestamp.freq Timestamp.freqstr - Timestamp.from_ordinal + Timestamp.fromordinal Timestamp.fromtimestamp Timestamp.isocalendar Timestamp.isoformat @@ -1769,9 +1769,7 @@ Properties Timedelta.asm8 Timedelta.components Timedelta.days - Timedelta.delta Timedelta.freq - Timedelta.is_populated Timedelta.max Timedelta.microseconds Timedelta.min @@ -1789,10 +1787,9 @@ Methods Timedelta.floor Timedelta.isoformat Timedelta.round - Timdelta.to_pytimedelta + Timedelta.to_pytimedelta Timedelta.to_timedelta64 Timedelta.total_seconds - Timedelta.view Window ------ From 7e4e8acf5b5d68b3dfadecd3ba816d4f0b9be0ce Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 7 Sep 2017 18:00:05 -0700 Subject: [PATCH 25/57] PERF: Implement get_freq_code in cython frequencies (#17422) --- asv_bench/benchmarks/period.py | 29 ++++ pandas/_libs/tslibs/__init__.py | 0 pandas/_libs/tslibs/frequencies.pyx | 201 ++++++++++++++++++++++++++++ pandas/tseries/frequencies.py | 79 +---------- setup.py | 4 + 5 files changed, 235 insertions(+), 78 deletions(-) create mode 100644 pandas/_libs/tslibs/__init__.py create mode 100644 pandas/_libs/tslibs/frequencies.pyx diff --git a/asv_bench/benchmarks/period.py b/asv_bench/benchmarks/period.py index f9837191a7bae..78d66295f28cc 100644 --- a/asv_bench/benchmarks/period.py +++ b/asv_bench/benchmarks/period.py @@ -2,6 +2,35 @@ from pandas import Series, Period, PeriodIndex, date_range +class PeriodProperties(object): + def setup(self): + self.per = Period('2012-06-01', freq='M') + + def time_year(self): + self.per.year + + def time_month(self): + self.per.month + + def time_quarter(self): + self.per.quarter + + def time_day(self): + self.per.day + + def time_hour(self): + self.per.hour + + def time_minute(self): + self.per.second + + def time_second(self): + self.per.second + + def time_leap_year(self): + self.per.is_leapyear + + class Constructor(object): goal_time = 0.2 diff --git a/pandas/_libs/tslibs/__init__.py b/pandas/_libs/tslibs/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/_libs/tslibs/frequencies.pyx b/pandas/_libs/tslibs/frequencies.pyx new file mode 100644 index 0000000000000..35429e8ae87f0 --- /dev/null +++ b/pandas/_libs/tslibs/frequencies.pyx @@ -0,0 +1,201 @@ +# -*- coding: utf-8 -*- +# cython: profile=False +import re + +cimport cython + +import numpy as np +cimport numpy as np +np.import_array() + +from util cimport is_integer_object + + +cpdef get_freq_code(freqstr): + """ + Return freq str or tuple to freq code and stride (mult) + + Parameters + ---------- + freqstr : str or tuple + + Returns + ------- + return : tuple of base frequency code and stride (mult) + + Example + ------- + >>> get_freq_code('3D') + (6000, 3) + + >>> get_freq_code('D') + (6000, 1) + + >>> get_freq_code(('D', 3)) + (6000, 3) + """ + if getattr(freqstr, '_typ', None) == 'dateoffset': + freqstr = (freqstr.rule_code, freqstr.n) + + if isinstance(freqstr, tuple): + if (is_integer_object(freqstr[0]) and + is_integer_object(freqstr[1])): + # e.g., freqstr = (2000, 1) + return freqstr + else: + # e.g., freqstr = ('T', 5) + try: + code = _period_str_to_code(freqstr[0]) + stride = freqstr[1] + except: + if is_integer_object(freqstr[1]): + raise + code = _period_str_to_code(freqstr[1]) + stride = freqstr[0] + return code, stride + + if is_integer_object(freqstr): + return (freqstr, 1) + + base, stride = _base_and_stride(freqstr) + code = _period_str_to_code(base) + + return code, stride + + +# hack to handle WOM-1MON +opattern = re.compile( + r'([\-]?\d*|[\-]?\d*\.\d*)\s*([A-Za-z]+([\-][\dA-Za-z\-]+)?)' +) + + +cpdef _base_and_stride(freqstr): + """ + Return base freq and stride info from string representation + + Examples + -------- + _freq_and_stride('5Min') -> 'Min', 5 + """ + groups = opattern.match(freqstr) + + if not groups: + raise ValueError("Could not evaluate {freq}".format(freq=freqstr)) + + stride = groups.group(1) + + if len(stride): + stride = int(stride) + else: + stride = 1 + + base = groups.group(2) + + return (base, stride) + + +# --------------------------------------------------------------------- +# Period codes + +# period frequency constants corresponding to scikits timeseries +# originals +_period_code_map = { + # Annual freqs with various fiscal year ends. + # eg, 2005 for A-FEB runs Mar 1, 2004 to Feb 28, 2005 + "A-DEC": 1000, # Annual - December year end + "A-JAN": 1001, # Annual - January year end + "A-FEB": 1002, # Annual - February year end + "A-MAR": 1003, # Annual - March year end + "A-APR": 1004, # Annual - April year end + "A-MAY": 1005, # Annual - May year end + "A-JUN": 1006, # Annual - June year end + "A-JUL": 1007, # Annual - July year end + "A-AUG": 1008, # Annual - August year end + "A-SEP": 1009, # Annual - September year end + "A-OCT": 1010, # Annual - October year end + "A-NOV": 1011, # Annual - November year end + + # Quarterly frequencies with various fiscal year ends. + # eg, Q42005 for Q-OCT runs Aug 1, 2005 to Oct 31, 2005 + "Q-DEC": 2000, # Quarterly - December year end + "Q-JAN": 2001, # Quarterly - January year end + "Q-FEB": 2002, # Quarterly - February year end + "Q-MAR": 2003, # Quarterly - March year end + "Q-APR": 2004, # Quarterly - April year end + "Q-MAY": 2005, # Quarterly - May year end + "Q-JUN": 2006, # Quarterly - June year end + "Q-JUL": 2007, # Quarterly - July year end + "Q-AUG": 2008, # Quarterly - August year end + "Q-SEP": 2009, # Quarterly - September year end + "Q-OCT": 2010, # Quarterly - October year end + "Q-NOV": 2011, # Quarterly - November year end + + "M": 3000, # Monthly + + "W-SUN": 4000, # Weekly - Sunday end of week + "W-MON": 4001, # Weekly - Monday end of week + "W-TUE": 4002, # Weekly - Tuesday end of week + "W-WED": 4003, # Weekly - Wednesday end of week + "W-THU": 4004, # Weekly - Thursday end of week + "W-FRI": 4005, # Weekly - Friday end of week + "W-SAT": 4006, # Weekly - Saturday end of week + + "B": 5000, # Business days + "D": 6000, # Daily + "H": 7000, # Hourly + "T": 8000, # Minutely + "S": 9000, # Secondly + "L": 10000, # Millisecondly + "U": 11000, # Microsecondly + "N": 12000, # Nanosecondly +} + +# Yearly aliases; careful not to put these in _reverse_period_code_map +_period_code_map.update({'Y' + key[1:]: _period_code_map[key] + for key in _period_code_map + if key.startswith('A-')}) + +_period_code_map.update({ + "Q": 2000, # Quarterly - December year end (default quarterly) + "A": 1000, # Annual + "W": 4000, # Weekly + "C": 5000, # Custom Business Day + }) + +_dont_uppercase = set(('MS', 'ms')) + +_lite_rule_alias = { + 'W': 'W-SUN', + 'Q': 'Q-DEC', + + 'A': 'A-DEC', # YearEnd(month=12), + 'Y': 'A-DEC', + 'AS': 'AS-JAN', # YearBegin(month=1), + 'YS': 'AS-JAN', + 'BA': 'BA-DEC', # BYearEnd(month=12), + 'BY': 'BA-DEC', + 'BAS': 'BAS-JAN', # BYearBegin(month=1), + 'BYS': 'BAS-JAN', + + 'Min': 'T', + 'min': 'T', + 'ms': 'L', + 'us': 'U', + 'ns': 'N'} + +_INVALID_FREQ_ERROR = "Invalid frequency: {0}" + + +cpdef _period_str_to_code(freqstr): + freqstr = _lite_rule_alias.get(freqstr, freqstr) + + if freqstr not in _dont_uppercase: + lower = freqstr.lower() + freqstr = _lite_rule_alias.get(lower, freqstr) + + if freqstr not in _dont_uppercase: + freqstr = freqstr.upper() + try: + return _period_code_map[freqstr] + except KeyError: + raise ValueError(_INVALID_FREQ_ERROR.format(freqstr)) diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index 7f34bcaf52926..6644a33245a84 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -8,7 +8,6 @@ from pandas.core.dtypes.generic import ABCSeries from pandas.core.dtypes.common import ( - is_integer, is_period_arraylike, is_timedelta64_dtype, is_datetime64_dtype) @@ -21,6 +20,7 @@ from pandas._libs import lib, tslib from pandas._libs.tslib import Timedelta +from pandas._libs.tslibs.frequencies import get_freq_code, _base_and_stride from pytz import AmbiguousTimeError @@ -298,58 +298,6 @@ def get_freq(freq): return freq -def get_freq_code(freqstr): - """ - Return freq str or tuple to freq code and stride (mult) - - Parameters - ---------- - freqstr : str or tuple - - Returns - ------- - return : tuple of base frequency code and stride (mult) - - Example - ------- - >>> get_freq_code('3D') - (6000, 3) - - >>> get_freq_code('D') - (6000, 1) - - >>> get_freq_code(('D', 3)) - (6000, 3) - """ - if isinstance(freqstr, DateOffset): - freqstr = (freqstr.rule_code, freqstr.n) - - if isinstance(freqstr, tuple): - if (is_integer(freqstr[0]) and - is_integer(freqstr[1])): - # e.g., freqstr = (2000, 1) - return freqstr - else: - # e.g., freqstr = ('T', 5) - try: - code = _period_str_to_code(freqstr[0]) - stride = freqstr[1] - except: - if is_integer(freqstr[1]): - raise - code = _period_str_to_code(freqstr[1]) - stride = freqstr[0] - return code, stride - - if is_integer(freqstr): - return (freqstr, 1) - - base, stride = _base_and_stride(freqstr) - code = _period_str_to_code(base) - - return code, stride - - def _get_freq_str(base, mult=1): code = _reverse_period_code_map.get(base) if mult == 1: @@ -577,31 +525,6 @@ def to_offset(freq): ) -def _base_and_stride(freqstr): - """ - Return base freq and stride info from string representation - - Examples - -------- - _freq_and_stride('5Min') -> 'Min', 5 - """ - groups = opattern.match(freqstr) - - if not groups: - raise ValueError("Could not evaluate {freq}".format(freq=freqstr)) - - stride = groups.group(1) - - if len(stride): - stride = int(stride) - else: - stride = 1 - - base = groups.group(2) - - return (base, stride) - - def get_base_alias(freqstr): """ Returns the base frequency alias, e.g., '5D' -> 'D' diff --git a/setup.py b/setup.py index 444db5bc4d275..4e326beefa908 100755 --- a/setup.py +++ b/setup.py @@ -341,6 +341,7 @@ class CheckSDist(sdist_class): 'pandas/_libs/window.pyx', 'pandas/_libs/sparse.pyx', 'pandas/_libs/parsers.pyx', + 'panads/_libs/tslibs/frequencies.pyx', 'pandas/io/sas/sas.pyx'] def initialize_options(self): @@ -492,6 +493,8 @@ def pxd(name): 'sources': ['pandas/_libs/src/datetime/np_datetime.c', 'pandas/_libs/src/datetime/np_datetime_strings.c', 'pandas/_libs/src/period_helper.c']}, + '_libs.tslibs.frequencies': {'pyxfile': '_libs/tslibs/frequencies', + 'pxdfiles': ['_libs/src/util']}, '_libs.index': {'pyxfile': '_libs/index', 'sources': ['pandas/_libs/src/datetime/np_datetime.c', 'pandas/_libs/src/datetime/np_datetime_strings.c'], @@ -653,6 +656,7 @@ def pxd(name): 'pandas.io.formats', 'pandas.io.clipboard', 'pandas._libs', + 'pandas._libs.tslibs', 'pandas.plotting', 'pandas.stats', 'pandas.types', From 3ccb88c912d898b2fd8decd3d988aca264e4e820 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 8 Sep 2017 03:05:05 -0700 Subject: [PATCH 26/57] Fix typo in setup.py introduced by 17422 (#17473) --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 4e326beefa908..3269fe7972cf0 100755 --- a/setup.py +++ b/setup.py @@ -341,7 +341,7 @@ class CheckSDist(sdist_class): 'pandas/_libs/window.pyx', 'pandas/_libs/sparse.pyx', 'pandas/_libs/parsers.pyx', - 'panads/_libs/tslibs/frequencies.pyx', + 'pandas/_libs/tslibs/frequencies.pyx', 'pandas/io/sas/sas.pyx'] def initialize_options(self): From d6df8ea99f2574480e934aae01a1e142f935145e Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 8 Sep 2017 03:16:13 -0700 Subject: [PATCH 27/57] Follow up to #17422 (#17472) --- pandas/_libs/period.pyx | 55 ++++++------ pandas/_libs/tslibs/frequencies.pxd | 4 + pandas/_libs/tslibs/frequencies.pyx | 3 + pandas/tseries/frequencies.py | 128 +--------------------------- 4 files changed, 38 insertions(+), 152 deletions(-) create mode 100644 pandas/_libs/tslibs/frequencies.pxd diff --git a/pandas/_libs/period.pyx b/pandas/_libs/period.pyx index 8f89b812fec04..e2a3baa8d6e8b 100644 --- a/pandas/_libs/period.pyx +++ b/pandas/_libs/period.pyx @@ -10,17 +10,16 @@ from cpython cimport ( from numpy cimport (int8_t, int32_t, int64_t, import_array, ndarray, NPY_INT64, NPY_DATETIME, NPY_TIMEDELTA) import numpy as np +import_array() from libc.stdlib cimport free -from pandas import compat from pandas.compat import PY2 cimport cython from datetime cimport ( is_leapyear, - PyDateTime_IMPORT, pandas_datetimestruct, pandas_datetimestruct_to_datetime, pandas_datetime_to_datetimestruct, @@ -29,6 +28,7 @@ from datetime cimport ( cimport util, lib +from util cimport is_period_object, is_string_object from lib cimport is_null_datetimelike, is_period from pandas._libs import tslib, lib @@ -41,6 +41,8 @@ from tslib cimport ( _get_dst_info, _nat_scalar_rules) +from tslibs.frequencies cimport get_freq_code + from pandas.tseries import offsets from pandas.core.tools.datetimes import parse_time_string from pandas.tseries import frequencies @@ -329,8 +331,6 @@ cdef list str_extra_fmts = ["^`AB`^", "^`CD`^", "^`EF`^", "^`GH`^", "^`IJ`^", "^`KL`^"] cdef object _period_strftime(int64_t value, int freq, object fmt): - import sys - cdef: Py_ssize_t i date_info dinfo @@ -683,7 +683,7 @@ cdef class _Period(object): def _maybe_convert_freq(cls, object freq): if isinstance(freq, (int, tuple)): - code, stride = frequencies.get_freq_code(freq) + code, stride = get_freq_code(freq) freq = frequencies._get_freq_str(code, stride) freq = frequencies.to_offset(freq) @@ -707,7 +707,7 @@ cdef class _Period(object): return self def __richcmp__(self, other, op): - if isinstance(other, Period): + if is_period_object(other): if other.freq != self.freq: msg = _DIFFERENT_FREQ.format(self.freqstr, other.freqstr) raise IncompatibleFrequency(msg) @@ -753,7 +753,7 @@ cdef class _Period(object): return NotImplemented def __add__(self, other): - if isinstance(self, Period): + if is_period_object(self): if isinstance(other, (timedelta, np.timedelta64, offsets.DateOffset, Timedelta)): @@ -765,13 +765,13 @@ cdef class _Period(object): return Period(ordinal=ordinal, freq=self.freq) else: # pragma: no cover return NotImplemented - elif isinstance(other, Period): + elif is_period_object(other): return other + self else: return NotImplemented def __sub__(self, other): - if isinstance(self, Period): + if is_period_object(self): if isinstance(other, (timedelta, np.timedelta64, offsets.DateOffset, Timedelta)): @@ -780,7 +780,7 @@ cdef class _Period(object): elif lib.is_integer(other): ordinal = self.ordinal - other * self.freq.n return Period(ordinal=ordinal, freq=self.freq) - elif isinstance(other, Period): + elif is_period_object(other): if other.freq != self.freq: msg = _DIFFERENT_FREQ.format(self.freqstr, other.freqstr) raise IncompatibleFrequency(msg) @@ -789,7 +789,7 @@ cdef class _Period(object): return -other.__sub__(self) else: # pragma: no cover return NotImplemented - elif isinstance(other, Period): + elif is_period_object(other): if self is NaT: return NaT return NotImplemented @@ -813,8 +813,8 @@ cdef class _Period(object): """ freq = self._maybe_convert_freq(freq) how = _validate_end_alias(how) - base1, mult1 = frequencies.get_freq_code(self.freq) - base2, mult2 = frequencies.get_freq_code(freq) + base1, mult1 = get_freq_code(self.freq) + base2, mult2 = get_freq_code(freq) # mult1 can't be negative or 0 end = how == 'E' @@ -860,17 +860,17 @@ cdef class _Period(object): how = _validate_end_alias(how) if freq is None: - base, mult = frequencies.get_freq_code(self.freq) + base, mult = get_freq_code(self.freq) freq = frequencies.get_to_timestamp_base(base) - base, mult = frequencies.get_freq_code(freq) + base, mult = get_freq_code(freq) val = self.asfreq(freq, how) dt64 = period_ordinal_to_dt64(val.ordinal, base) return Timestamp(dt64, tz=tz) cdef _field(self, alias): - base, mult = frequencies.get_freq_code(self.freq) + base, mult = get_freq_code(self.freq) return get_period_field(alias, self.ordinal, base) property year: @@ -935,7 +935,7 @@ cdef class _Period(object): return self.freq.freqstr def __repr__(self): - base, mult = frequencies.get_freq_code(self.freq) + base, mult = get_freq_code(self.freq) formatted = period_format(self.ordinal, base) return "Period('%s', '%s')" % (formatted, self.freqstr) @@ -946,7 +946,7 @@ cdef class _Period(object): Invoked by unicode(df) in py2 only. Yields a Unicode String in both py2/py3. """ - base, mult = frequencies.get_freq_code(self.freq) + base, mult = get_freq_code(self.freq) formatted = period_format(self.ordinal, base) value = ("%s" % formatted) return value @@ -1096,7 +1096,7 @@ cdef class _Period(object): >>> a.strftime('%b. %d, %Y was a %A') 'Jan. 01, 2001 was a Monday' """ - base, mult = frequencies.get_freq_code(self.freq) + base, mult = get_freq_code(self.freq) return period_format(self.ordinal, base, fmt) @@ -1161,10 +1161,10 @@ class Period(_Period): ordinal = _ordinal_from_fields(year, month, quarter, day, hour, minute, second, freq) - elif isinstance(value, Period): + elif is_period_object(value): other = value - if freq is None or frequencies.get_freq_code( - freq) == frequencies.get_freq_code(other.freq): + if freq is None or get_freq_code( + freq) == get_freq_code(other.freq): ordinal = other.ordinal freq = other.freq else: @@ -1174,7 +1174,7 @@ class Period(_Period): elif is_null_datetimelike(value) or value in tslib._nat_strings: ordinal = iNaT - elif isinstance(value, compat.string_types) or lib.is_integer(value): + elif is_string_object(value) or lib.is_integer(value): if lib.is_integer(value): value = str(value) value = value.upper() @@ -1191,7 +1191,7 @@ class Period(_Period): dt = value if freq is None: raise ValueError('Must supply freq for datetime value') - elif isinstance(value, np.datetime64): + elif util.is_datetime64_object(value): dt = Timestamp(value) if freq is None: raise ValueError('Must supply freq for datetime value') @@ -1204,7 +1204,7 @@ class Period(_Period): raise ValueError(msg) if ordinal is None: - base, mult = frequencies.get_freq_code(freq) + base, mult = get_freq_code(freq) ordinal = get_period_ordinal(dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second, dt.microsecond, 0, base) @@ -1214,7 +1214,7 @@ class Period(_Period): def _ordinal_from_fields(year, month, quarter, day, hour, minute, second, freq): - base, mult = frequencies.get_freq_code(freq) + base, mult = get_freq_code(freq) if quarter is not None: year, month = _quarter_to_myear(year, quarter, freq) @@ -1227,8 +1227,7 @@ def _quarter_to_myear(year, quarter, freq): if quarter <= 0 or quarter > 4: raise ValueError('Quarter must be 1 <= q <= 4') - mnum = frequencies._month_numbers[ - frequencies._get_rule_month(freq)] + 1 + mnum = tslib._MONTH_NUMBERS[tslib._get_rule_month(freq)] + 1 month = (mnum + (quarter - 1) * 3) % 12 + 1 if month > mnum: year -= 1 diff --git a/pandas/_libs/tslibs/frequencies.pxd b/pandas/_libs/tslibs/frequencies.pxd new file mode 100644 index 0000000000000..974eb4ab45df0 --- /dev/null +++ b/pandas/_libs/tslibs/frequencies.pxd @@ -0,0 +1,4 @@ +# -*- coding: utf-8 -*- +# cython: profile=False + +cpdef get_freq_code(freqstr) diff --git a/pandas/_libs/tslibs/frequencies.pyx b/pandas/_libs/tslibs/frequencies.pyx index 35429e8ae87f0..f7889d76abbc7 100644 --- a/pandas/_libs/tslibs/frequencies.pyx +++ b/pandas/_libs/tslibs/frequencies.pyx @@ -150,6 +150,9 @@ _period_code_map = { "N": 12000, # Nanosecondly } +_reverse_period_code_map = { + _period_code_map[key]: key for key in _period_code_map} + # Yearly aliases; careful not to put these in _reverse_period_code_map _period_code_map.update({'Y' + key[1:]: _period_code_map[key] for key in _period_code_map diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index 6644a33245a84..085a3a784557b 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -20,7 +20,10 @@ from pandas._libs import lib, tslib from pandas._libs.tslib import Timedelta -from pandas._libs.tslibs.frequencies import get_freq_code, _base_and_stride +from pandas._libs.tslibs.frequencies import ( # noqa + get_freq_code, _base_and_stride, _period_str_to_code, + _INVALID_FREQ_ERROR, opattern, _lite_rule_alias, _dont_uppercase, + _period_code_map, _reverse_period_code_map) from pytz import AmbiguousTimeError @@ -375,27 +378,6 @@ def get_period_alias(offset_str): return _offset_to_period_map.get(offset_str, None) -_lite_rule_alias = { - 'W': 'W-SUN', - 'Q': 'Q-DEC', - - 'A': 'A-DEC', # YearEnd(month=12), - 'Y': 'A-DEC', - 'AS': 'AS-JAN', # YearBegin(month=1), - 'YS': 'AS-JAN', - 'BA': 'BA-DEC', # BYearEnd(month=12), - 'BY': 'BA-DEC', - 'BAS': 'BAS-JAN', # BYearBegin(month=1), - 'BYS': 'BAS-JAN', - - 'Min': 'T', - 'min': 'T', - 'ms': 'L', - 'us': 'U', - 'ns': 'N' -} - - _name_to_offset_map = {'days': Day(1), 'hours': Hour(1), 'minutes': Minute(1), @@ -405,9 +387,6 @@ def get_period_alias(offset_str): 'nanoseconds': Nano(1)} -_INVALID_FREQ_ERROR = "Invalid frequency: {0}" - - @deprecate_kwarg(old_arg_name='freqstr', new_arg_name='freq') def to_offset(freq): """ @@ -519,12 +498,6 @@ def to_offset(freq): return delta -# hack to handle WOM-1MON -opattern = re.compile( - r'([\-]?\d*|[\-]?\d*\.\d*)\s*([A-Za-z]+([\-][\dA-Za-z\-]+)?)' -) - - def get_base_alias(freqstr): """ Returns the base frequency alias, e.g., '5D' -> 'D' @@ -532,9 +505,6 @@ def get_base_alias(freqstr): return _base_and_stride(freqstr)[0] -_dont_uppercase = set(('MS', 'ms')) - - def get_offset(name): """ Return DateOffset object associated with rule name @@ -583,96 +553,6 @@ def get_standard_freq(freq): # --------------------------------------------------------------------- # Period codes -# period frequency constants corresponding to scikits timeseries -# originals -_period_code_map = { - # Annual freqs with various fiscal year ends. - # eg, 2005 for A-FEB runs Mar 1, 2004 to Feb 28, 2005 - "A-DEC": 1000, # Annual - December year end - "A-JAN": 1001, # Annual - January year end - "A-FEB": 1002, # Annual - February year end - "A-MAR": 1003, # Annual - March year end - "A-APR": 1004, # Annual - April year end - "A-MAY": 1005, # Annual - May year end - "A-JUN": 1006, # Annual - June year end - "A-JUL": 1007, # Annual - July year end - "A-AUG": 1008, # Annual - August year end - "A-SEP": 1009, # Annual - September year end - "A-OCT": 1010, # Annual - October year end - "A-NOV": 1011, # Annual - November year end - - # Quarterly frequencies with various fiscal year ends. - # eg, Q42005 for Q-OCT runs Aug 1, 2005 to Oct 31, 2005 - "Q-DEC": 2000, # Quarterly - December year end - "Q-JAN": 2001, # Quarterly - January year end - "Q-FEB": 2002, # Quarterly - February year end - "Q-MAR": 2003, # Quarterly - March year end - "Q-APR": 2004, # Quarterly - April year end - "Q-MAY": 2005, # Quarterly - May year end - "Q-JUN": 2006, # Quarterly - June year end - "Q-JUL": 2007, # Quarterly - July year end - "Q-AUG": 2008, # Quarterly - August year end - "Q-SEP": 2009, # Quarterly - September year end - "Q-OCT": 2010, # Quarterly - October year end - "Q-NOV": 2011, # Quarterly - November year end - - "M": 3000, # Monthly - - "W-SUN": 4000, # Weekly - Sunday end of week - "W-MON": 4001, # Weekly - Monday end of week - "W-TUE": 4002, # Weekly - Tuesday end of week - "W-WED": 4003, # Weekly - Wednesday end of week - "W-THU": 4004, # Weekly - Thursday end of week - "W-FRI": 4005, # Weekly - Friday end of week - "W-SAT": 4006, # Weekly - Saturday end of week - - "B": 5000, # Business days - "D": 6000, # Daily - "H": 7000, # Hourly - "T": 8000, # Minutely - "S": 9000, # Secondly - "L": 10000, # Millisecondly - "U": 11000, # Microsecondly - "N": 12000, # Nanosecondly -} - -_reverse_period_code_map = {} -for _k, _v in compat.iteritems(_period_code_map): - _reverse_period_code_map[_v] = _k - -# Yearly aliases -year_aliases = {} - -for k, v in compat.iteritems(_period_code_map): - if k.startswith("A-"): - alias = "Y" + k[1:] - year_aliases[alias] = v - -_period_code_map.update(**year_aliases) -del year_aliases - -_period_code_map.update({ - "Q": 2000, # Quarterly - December year end (default quarterly) - "A": 1000, # Annual - "W": 4000, # Weekly - "C": 5000, # Custom Business Day -}) - - -def _period_str_to_code(freqstr): - freqstr = _lite_rule_alias.get(freqstr, freqstr) - - if freqstr not in _dont_uppercase: - lower = freqstr.lower() - freqstr = _lite_rule_alias.get(lower, freqstr) - - if freqstr not in _dont_uppercase: - freqstr = freqstr.upper() - try: - return _period_code_map[freqstr] - except KeyError: - raise ValueError(_INVALID_FREQ_ERROR.format(freqstr)) - def infer_freq(index, warn=True): """ From fdbc6b8f4b36f07da62fc901b19754f922ae3952 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Sat, 9 Sep 2017 12:09:08 -0700 Subject: [PATCH 28/57] MAINT: calcurate --> calculate in _doctools.py --- pandas/util/_doctools.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/pandas/util/_doctools.py b/pandas/util/_doctools.py index cbc9518b96416..d654c78b8b13f 100644 --- a/pandas/util/_doctools.py +++ b/pandas/util/_doctools.py @@ -15,12 +15,18 @@ def __init__(self, cell_width=0.37, cell_height=0.25, font_size=7.5): self.font_size = font_size def _shape(self, df): - """Calcurate table chape considering index levels""" + """ + Calculate table chape considering index levels. + """ + row, col = df.shape return row + df.columns.nlevels, col + df.index.nlevels def _get_cells(self, left, right, vertical): - """Calcurate appropriate figure size based on left and right data""" + """ + Calculate appropriate figure size based on left and right data. + """ + if vertical: # calcurate required number of cells vcells = max(sum([self._shape(l)[0] for l in left]), From 23050dca1b404d23527132c0277f3d40dc41cab8 Mon Sep 17 00:00:00 2001 From: Matt Bark Date: Sun, 10 Sep 2017 03:30:48 -0400 Subject: [PATCH 29/57] BUG: Fix TypeError caused by GH13374 (#17465) --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/io/parsers.py | 4 +++- pandas/tests/io/parser/python_parser_only.py | 19 +++++++++++++++++++ 3 files changed, 23 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index f50052347cfb5..bfe7d974a6097 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -411,6 +411,7 @@ I/O - Bug in :func:`read_csv` when called with a single-element list ``header`` would return a ``DataFrame`` of all NaN values (:issue:`7757`) - Bug in :func:`read_stata` where value labels could not be read when using an iterator (:issue:`16923`) - Bug in :func:`read_html` where import check fails when run in multiple threads (:issue:`16928`) +- Bug in :func:`read_csv` where automatic delimiter detection caused a ``TypeError`` to be thrown when a bad line was encountered rather than the correct error message (:issue:`13374`) Plotting ^^^^^^^^ diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 6adf154aabba7..d9e83176d0d6e 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -2836,7 +2836,9 @@ def _rows_to_cols(self, content): for row_num, actual_len in bad_lines: msg = ('Expected %d fields in line %d, saw %d' % (col_len, row_num + 1, actual_len)) - if len(self.delimiter) > 1 and self.quoting != csv.QUOTE_NONE: + if (self.delimiter and + len(self.delimiter) > 1 and + self.quoting != csv.QUOTE_NONE): # see gh-13374 reason = ('Error could possibly be due to quotes being ' 'ignored when a multi-char delimiter is used.') diff --git a/pandas/tests/io/parser/python_parser_only.py b/pandas/tests/io/parser/python_parser_only.py index a0784d3aeae2d..c3dc91b3f188c 100644 --- a/pandas/tests/io/parser/python_parser_only.py +++ b/pandas/tests/io/parser/python_parser_only.py @@ -218,6 +218,25 @@ def test_multi_char_sep_quotes(self): self.read_csv(StringIO(data), sep=',,', quoting=csv.QUOTE_NONE) + def test_none_delimiter(self): + # see gh-13374 and gh-17465 + + data = "a,b,c\n0,1,2\n3,4,5,6\n7,8,9" + expected = DataFrame({'a': [0, 7], + 'b': [1, 8], + 'c': [2, 9]}) + + # We expect the third line in the data to be + # skipped because it is malformed, + # but we do not expect any errors to occur. + result = self.read_csv(StringIO(data), header=0, + sep=None, + error_bad_lines=False, + warn_bad_lines=True, + engine='python', + tupleize_cols=True) + tm.assert_frame_equal(result, expected) + def test_skipfooter_bad_row(self): # see gh-13879 # see gh-15910 From c3ad501ed31e2e71ab91a201ed72779fdd597698 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 10 Sep 2017 07:19:52 -0700 Subject: [PATCH 30/57] Remove incorrect kwds from DateOffset tests (#17486) --- pandas/tests/tseries/test_offsets.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pandas/tests/tseries/test_offsets.py b/pandas/tests/tseries/test_offsets.py index e03b3e0a85e5e..7e6e85f322fe0 100644 --- a/pandas/tests/tseries/test_offsets.py +++ b/pandas/tests/tseries/test_offsets.py @@ -111,7 +111,10 @@ def offset_types(self): def _get_offset(self, klass, value=1, normalize=False): # create instance from offset class - if klass is FY5253 or klass is FY5253Quarter: + if klass is FY5253: + klass = klass(n=value, startingMonth=1, weekday=1, + variation='last', normalize=normalize) + elif klass is FY5253Quarter: klass = klass(n=value, startingMonth=1, weekday=1, qtr_with_extra_week=1, variation='last', normalize=normalize) @@ -2629,7 +2632,7 @@ def test_offset(self): def test_day_of_month(self): dt = datetime(2007, 1, 1) - offset = MonthEnd(day=20) + offset = MonthEnd() result = dt + offset assert result == Timestamp(2007, 1, 31) @@ -3678,7 +3681,7 @@ def test_onOffset(self): 1, startingMonth=8, weekday=WeekDay.THU, qtr_with_extra_week=4) offset_n = FY5253(weekday=WeekDay.TUE, startingMonth=12, - variation="nearest", qtr_with_extra_week=4) + variation="nearest") tests = [ # From Wikipedia From e6aed2ebb7374ed2a6a7c284750d47728aec285e Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 10 Sep 2017 07:43:37 -0700 Subject: [PATCH 31/57] Remove pyx dependencies from setup (#17478) --- setup.py | 31 +++++++++---------------------- 1 file changed, 9 insertions(+), 22 deletions(-) diff --git a/setup.py b/setup.py index 3269fe7972cf0..d64a78db7500a 100755 --- a/setup.py +++ b/setup.py @@ -347,14 +347,6 @@ class CheckSDist(sdist_class): def initialize_options(self): sdist_class.initialize_options(self) - ''' - self._pyxfiles = [] - for root, dirs, files in os.walk('pandas'): - for f in files: - if f.endswith('.pyx'): - self._pyxfiles.append(pjoin(root, f)) - ''' - def run(self): if 'cython' in cmdclass: self.run_command('cython') @@ -479,11 +471,10 @@ def pxd(name): '_libs.lib': {'pyxfile': '_libs/lib', 'depends': lib_depends + tseries_depends}, '_libs.hashtable': {'pyxfile': '_libs/hashtable', - 'pxdfiles': ['_libs/hashtable'], 'depends': (['pandas/_libs/src/klib/khash_python.h'] + _pxi_dep['hashtable'])}, '_libs.tslib': {'pyxfile': '_libs/tslib', - 'pxdfiles': ['_libs/src/util', '_libs/lib'], + 'pxdfiles': ['_libs/src/util'], 'depends': tseries_depends, 'sources': ['pandas/_libs/src/datetime/np_datetime.c', 'pandas/_libs/src/datetime/np_datetime_strings.c', @@ -498,21 +489,20 @@ def pxd(name): '_libs.index': {'pyxfile': '_libs/index', 'sources': ['pandas/_libs/src/datetime/np_datetime.c', 'pandas/_libs/src/datetime/np_datetime_strings.c'], - 'pxdfiles': ['_libs/src/util', '_libs/hashtable'], + 'pxdfiles': ['_libs/src/util'], 'depends': _pxi_dep['index']}, '_libs.algos': {'pyxfile': '_libs/algos', - 'pxdfiles': ['_libs/src/util', '_libs/algos', '_libs/hashtable'], + 'pxdfiles': ['_libs/src/util'], 'depends': _pxi_dep['algos']}, '_libs.groupby': {'pyxfile': '_libs/groupby', - 'pxdfiles': ['_libs/src/util', '_libs/algos'], - 'depends': _pxi_dep['groupby']}, + 'pxdfiles': ['_libs/src/util'], + 'depends': _pxi_dep['groupby']}, '_libs.join': {'pyxfile': '_libs/join', - 'pxdfiles': ['_libs/src/util', '_libs/hashtable'], + 'pxdfiles': ['_libs/src/util'], 'depends': _pxi_dep['join']}, '_libs.reshape': {'pyxfile': '_libs/reshape', 'depends': _pxi_dep['reshape']}, '_libs.interval': {'pyxfile': '_libs/interval', - 'pxdfiles': ['_libs/hashtable'], 'depends': _pxi_dep['interval']}, '_libs.window': {'pyxfile': '_libs/window', 'pxdfiles': ['_libs/src/skiplist', '_libs/src/util'], @@ -525,12 +515,9 @@ def pxd(name): 'sources': ['pandas/_libs/src/parser/tokenizer.c', 'pandas/_libs/src/parser/io.c']}, '_libs.sparse': {'pyxfile': '_libs/sparse', - 'depends': (['pandas/_libs/sparse.pyx'] + - _pxi_dep['sparse'])}, - '_libs.testing': {'pyxfile': '_libs/testing', - 'depends': ['pandas/_libs/testing.pyx']}, - '_libs.hashing': {'pyxfile': '_libs/hashing', - 'depends': ['pandas/_libs/hashing.pyx']}, + 'depends': _pxi_dep['sparse']}, + '_libs.testing': {'pyxfile': '_libs/testing'}, + '_libs.hashing': {'pyxfile': '_libs/hashing'}, 'io.sas._sas': {'pyxfile': 'io/sas/sas'}, } From 42ed4f143f8b0b386c90df9fa8a55d0f2e5a857c Mon Sep 17 00:00:00 2001 From: Licht Takeuchi Date: Mon, 11 Sep 2017 09:01:41 +0900 Subject: [PATCH 32/57] ENH: Add Styler.where (#17474) --- doc/source/api.rst | 1 + doc/source/whatsnew/v0.21.0.txt | 1 + pandas/io/formats/style.py | 42 +++++++++++++++++++ pandas/tests/io/formats/test_style.py | 58 +++++++++++++++++++++++++++ 4 files changed, 102 insertions(+) diff --git a/doc/source/api.rst b/doc/source/api.rst index c32a541d19605..27a4ab9cc6cbc 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -2062,6 +2062,7 @@ Style Application Styler.apply Styler.applymap + Styler.where Styler.format Styler.set_precision Styler.set_table_styles diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index bfe7d974a6097..eccd71f45ec27 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -112,6 +112,7 @@ Other Enhancements - `read_*` methods can now infer compression from non-string paths, such as ``pathlib.Path`` objects (:issue:`17206`). - :func:`pd.read_sas()` now recognizes much more of the most frequently used date (datetime) formats in SAS7BDAT files (:issue:`15871`). - :func:`DataFrame.items` and :func:`Series.items` is now present in both Python 2 and 3 and is lazy in all cases (:issue:`13918`, :issue:`17213`) +- :func:`Styler.where` has been implemented. It is as a convenience for :func:`Styler.applymap` and enables simple DataFrame styling on the Jupyter notebook (:issue:`17474`). diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 87d672197be30..d7677e3642c26 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -618,11 +618,53 @@ def applymap(self, func, subset=None, **kwargs): ------- self : Styler + See Also + -------- + Styler.where + """ self._todo.append((lambda instance: getattr(instance, '_applymap'), (func, subset), kwargs)) return self + def where(self, cond, value, other=None, subset=None, **kwargs): + """ + Apply a function elementwise, updating the HTML + representation with a style which is selected in + accordance with the return value of a function. + + .. versionadded:: 0.21.0 + + Parameters + ---------- + cond : callable + ``cond`` should take a scalar and return a boolean + value : str + applied when ``cond`` returns true + other : str + applied when ``cond`` returns false + subset : IndexSlice + a valid indexer to limit ``data`` to *before* applying the + function. Consider using a pandas.IndexSlice + kwargs : dict + pass along to ``cond`` + + Returns + ------- + self : Styler + + See Also + -------- + Styler.applymap + + """ + + if other is None: + other = '' + + return self.applymap(lambda val: value if cond(val) else other, + subset=subset, **kwargs) + def set_precision(self, precision): """ Set the precision used to render. diff --git a/pandas/tests/io/formats/test_style.py b/pandas/tests/io/formats/test_style.py index 59d9f938734ab..811381e4cbd2a 100644 --- a/pandas/tests/io/formats/test_style.py +++ b/pandas/tests/io/formats/test_style.py @@ -265,6 +265,64 @@ def f(x): col in self.df.loc[slice_].columns) assert result == expected + def test_where_with_one_style(self): + # GH 17474 + def f(x): + return x > 0.5 + + style1 = 'foo: bar' + + result = self.df.style.where(f, style1)._compute().ctx + expected = dict(((r, c), + [style1 if f(self.df.loc[row, col]) else '']) + for r, row in enumerate(self.df.index) + for c, col in enumerate(self.df.columns)) + assert result == expected + + def test_where_subset(self): + # GH 17474 + def f(x): + return x > 0.5 + + style1 = 'foo: bar' + style2 = 'baz: foo' + + slices = [pd.IndexSlice[:], pd.IndexSlice[:, ['A']], + pd.IndexSlice[[1], :], pd.IndexSlice[[1], ['A']], + pd.IndexSlice[:2, ['A', 'B']]] + + for slice_ in slices: + result = self.df.style.where(f, style1, style2, + subset=slice_)._compute().ctx + expected = dict(((r, c), + [style1 if f(self.df.loc[row, col]) else style2]) + for r, row in enumerate(self.df.index) + for c, col in enumerate(self.df.columns) + if row in self.df.loc[slice_].index and + col in self.df.loc[slice_].columns) + assert result == expected + + def test_where_subset_compare_with_applymap(self): + # GH 17474 + def f(x): + return x > 0.5 + + style1 = 'foo: bar' + style2 = 'baz: foo' + + def g(x): + return style1 if f(x) else style2 + + slices = [pd.IndexSlice[:], pd.IndexSlice[:, ['A']], + pd.IndexSlice[[1], :], pd.IndexSlice[[1], ['A']], + pd.IndexSlice[:2, ['A', 'B']]] + + for slice_ in slices: + result = self.df.style.where(f, style1, style2, + subset=slice_)._compute().ctx + expected = self.df.style.applymap(g, subset=slice_)._compute().ctx + assert result == expected + def test_empty(self): df = pd.DataFrame({'A': [1, 0]}) s = df.style From f3b6d1f91643d245d6b43b41e7c9fd1349fb8de5 Mon Sep 17 00:00:00 2001 From: rvernica Date: Mon, 11 Sep 2017 04:03:18 -0700 Subject: [PATCH 33/57] Add file-like object to docs (#17492) --- pandas/io/feather_format.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pandas/io/feather_format.py b/pandas/io/feather_format.py index 87a4931421d7d..b2bf4ab7ff7f1 100644 --- a/pandas/io/feather_format.py +++ b/pandas/io/feather_format.py @@ -41,8 +41,7 @@ def to_feather(df, path): Parameters ---------- df : DataFrame - path : string - File path + path : string file path, or file-like object """ path = _stringify_path(path) @@ -92,8 +91,7 @@ def read_feather(path, nthreads=1): Parameters ---------- - path : string - File path + path : string file path, or file-like object nthreads : int, default 1 Number of CPU threads to use when reading to pandas.DataFrame From 46856c3936540a47df719d10a7699eb35673e4a4 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 11 Sep 2017 04:22:56 -0700 Subject: [PATCH 34/57] Implement _is_utc in timezones (#17419) --- pandas/_libs/index.pyx | 7 +------ pandas/_libs/period.pyx | 2 +- pandas/_libs/tslib.pxd | 1 - pandas/_libs/tslib.pyx | 4 ++-- pandas/_libs/tslibs/__init__.py | 2 ++ pandas/_libs/tslibs/timezones.pxd | 4 ++++ pandas/_libs/tslibs/timezones.pyx | 12 ++++++++++++ setup.py | 2 ++ 8 files changed, 24 insertions(+), 10 deletions(-) create mode 100644 pandas/_libs/tslibs/timezones.pxd create mode 100644 pandas/_libs/tslibs/timezones.pyx diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index 42ba0c1cadaec..bf4d53683c9b7 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -17,6 +17,7 @@ cimport tslib from hashtable cimport HashTable +from tslibs.timezones cimport _is_utc from pandas._libs import tslib, algos, hashtable as _hash from pandas._libs.tslib import Timestamp, Timedelta from datetime import datetime, timedelta @@ -32,9 +33,6 @@ cdef extern from "datetime.h": cdef int64_t iNaT = util.get_nat() -from dateutil.tz import tzutc as _du_utc -import pytz -UTC = pytz.utc PyDateTime_IMPORT @@ -559,9 +557,6 @@ cdef inline _to_i8(object val): return ival return val -cdef inline bint _is_utc(object tz): - return tz is UTC or isinstance(tz, _du_utc) - cdef class MultiIndexObjectEngine(ObjectEngine): """ diff --git a/pandas/_libs/period.pyx b/pandas/_libs/period.pyx index e2a3baa8d6e8b..08962bca824ca 100644 --- a/pandas/_libs/period.pyx +++ b/pandas/_libs/period.pyx @@ -34,9 +34,9 @@ from lib cimport is_null_datetimelike, is_period from pandas._libs import tslib, lib from pandas._libs.tslib import (Timedelta, Timestamp, iNaT, NaT, _get_utcoffset) +from tslibs.timezones cimport _is_utc from tslib cimport ( maybe_get_tz, - _is_utc, _is_tzlocal, _get_dst_info, _nat_scalar_rules) diff --git a/pandas/_libs/tslib.pxd b/pandas/_libs/tslib.pxd index aa8cbcb2cedc7..1d81c3cc15cd8 100644 --- a/pandas/_libs/tslib.pxd +++ b/pandas/_libs/tslib.pxd @@ -3,7 +3,6 @@ from numpy cimport ndarray, int64_t cdef convert_to_tsobject(object, object, object, bint, bint) cpdef convert_to_timedelta64(object, object) cpdef object maybe_get_tz(object) -cdef bint _is_utc(object) cdef bint _is_tzlocal(object) cdef object _get_dst_info(object) cdef bint _nat_scalar_rules[6] diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 7e009652f7f0c..b1f794a0030d1 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -107,6 +107,8 @@ cdef int64_t NPY_NAT = util.get_nat() iNaT = NPY_NAT +from tslibs.timezones cimport _is_utc + cdef inline object create_timestamp_from_ts( int64_t value, pandas_datetimestruct dts, object tz, object freq): @@ -1713,8 +1715,6 @@ def _localize_pydatetime(object dt, object tz): def get_timezone(tz): return _get_zone(tz) -cdef inline bint _is_utc(object tz): - return tz is UTC or isinstance(tz, _dateutil_tzutc) cdef inline object _get_zone(object tz): """ diff --git a/pandas/_libs/tslibs/__init__.py b/pandas/_libs/tslibs/__init__.py index e69de29bb2d1d..f3aa0424f0376 100644 --- a/pandas/_libs/tslibs/__init__.py +++ b/pandas/_libs/tslibs/__init__.py @@ -0,0 +1,2 @@ +# -*- coding: utf-8 -*- +# cython: profile=False diff --git a/pandas/_libs/tslibs/timezones.pxd b/pandas/_libs/tslibs/timezones.pxd new file mode 100644 index 0000000000000..0708282abe1d0 --- /dev/null +++ b/pandas/_libs/tslibs/timezones.pxd @@ -0,0 +1,4 @@ +# -*- coding: utf-8 -*- +# cython: profile=False + +cdef bint _is_utc(object tz) diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx new file mode 100644 index 0000000000000..43709e77b70d5 --- /dev/null +++ b/pandas/_libs/tslibs/timezones.pyx @@ -0,0 +1,12 @@ +# -*- coding: utf-8 -*- +# cython: profile=False + +# dateutil compat +from dateutil.tz import tzutc as _dateutil_tzutc + +import pytz +UTC = pytz.utc + + +cdef inline bint _is_utc(object tz): + return tz is UTC or isinstance(tz, _dateutil_tzutc) diff --git a/setup.py b/setup.py index d64a78db7500a..434ca64473916 100755 --- a/setup.py +++ b/setup.py @@ -341,6 +341,7 @@ class CheckSDist(sdist_class): 'pandas/_libs/window.pyx', 'pandas/_libs/sparse.pyx', 'pandas/_libs/parsers.pyx', + 'pandas/_libs/tslibs/timezones.pyx', 'pandas/_libs/tslibs/frequencies.pyx', 'pandas/io/sas/sas.pyx'] @@ -479,6 +480,7 @@ def pxd(name): 'sources': ['pandas/_libs/src/datetime/np_datetime.c', 'pandas/_libs/src/datetime/np_datetime_strings.c', 'pandas/_libs/src/period_helper.c']}, + '_libs.tslibs.timezones': {'pyxfile': '_libs/tslibs/timezones'}, '_libs.period': {'pyxfile': '_libs/period', 'depends': tseries_depends, 'sources': ['pandas/_libs/src/datetime/np_datetime.c', From 34cc2e812f60687d2a4417ff26fc180f7c042674 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 12 Sep 2017 03:09:50 -0700 Subject: [PATCH 35/57] Follow-up to #17419 (#17497) --- pandas/_libs/period.pyx | 5 +-- pandas/_libs/src/inference.pyx | 7 ++-- pandas/_libs/tslib.pxd | 1 - pandas/_libs/tslib.pyx | 66 ++++-------------------------- pandas/_libs/tslibs/timezones.pxd | 8 ++++ pandas/_libs/tslibs/timezones.pyx | 68 ++++++++++++++++++++++++++++++- 6 files changed, 88 insertions(+), 67 deletions(-) diff --git a/pandas/_libs/period.pyx b/pandas/_libs/period.pyx index 08962bca824ca..2b0734f5cf2e7 100644 --- a/pandas/_libs/period.pyx +++ b/pandas/_libs/period.pyx @@ -33,11 +33,10 @@ from util cimport is_period_object, is_string_object from lib cimport is_null_datetimelike, is_period from pandas._libs import tslib, lib from pandas._libs.tslib import (Timedelta, Timestamp, iNaT, - NaT, _get_utcoffset) -from tslibs.timezones cimport _is_utc + NaT) +from tslibs.timezones cimport _is_utc, _is_tzlocal, _get_utcoffset from tslib cimport ( maybe_get_tz, - _is_tzlocal, _get_dst_info, _nat_scalar_rules) diff --git a/pandas/_libs/src/inference.pyx b/pandas/_libs/src/inference.pyx index 6b5a8f20f0067..95145ff49b02f 100644 --- a/pandas/_libs/src/inference.pyx +++ b/pandas/_libs/src/inference.pyx @@ -2,7 +2,8 @@ import sys from decimal import Decimal cimport util cimport cython -from tslib import NaT, get_timezone +from tslib import NaT +from tslibs.timezones cimport _get_zone from datetime import datetime, timedelta iNaT = util.get_nat() @@ -900,13 +901,13 @@ cpdef bint is_datetime_with_singletz_array(ndarray[object] values): for i in range(n): base_val = values[i] if base_val is not NaT: - base_tz = get_timezone(getattr(base_val, 'tzinfo', None)) + base_tz = _get_zone(getattr(base_val, 'tzinfo', None)) for j in range(i, n): val = values[j] if val is not NaT: tz = getattr(val, 'tzinfo', None) - if base_tz != tz and base_tz != get_timezone(tz): + if base_tz != tz and base_tz != _get_zone(tz): return False break diff --git a/pandas/_libs/tslib.pxd b/pandas/_libs/tslib.pxd index 1d81c3cc15cd8..c1b25963a6257 100644 --- a/pandas/_libs/tslib.pxd +++ b/pandas/_libs/tslib.pxd @@ -3,7 +3,6 @@ from numpy cimport ndarray, int64_t cdef convert_to_tsobject(object, object, object, bint, bint) cpdef convert_to_timedelta64(object, object) cpdef object maybe_get_tz(object) -cdef bint _is_tzlocal(object) cdef object _get_dst_info(object) cdef bint _nat_scalar_rules[6] cdef bint _check_all_nulls(obj) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index b1f794a0030d1..a8ae0fcd733d6 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -107,7 +107,13 @@ cdef int64_t NPY_NAT = util.get_nat() iNaT = NPY_NAT -from tslibs.timezones cimport _is_utc +from tslibs.timezones cimport ( + _is_utc, _is_tzlocal, + _treat_tz_as_dateutil, _treat_tz_as_pytz, + _get_zone, + _get_utcoffset) +from tslibs.timezones import get_timezone, _get_utcoffset # noqa + cdef inline object create_timestamp_from_ts( int64_t value, pandas_datetimestruct dts, @@ -235,10 +241,6 @@ def ints_to_pytimedelta(ndarray[int64_t] arr, box=False): return result -cdef inline bint _is_tzlocal(object tz): - return isinstance(tz, _dateutil_tzlocal) - - cdef inline bint _is_fixed_offset(object tz): if _treat_tz_as_dateutil(tz): if len(tz._trans_idx) == 0 and len(tz._trans_list) == 0: @@ -1443,11 +1445,6 @@ cdef class _TSObject: def __get__(self): return self.value -cpdef _get_utcoffset(tzinfo, obj): - try: - return tzinfo._utcoffset - except AttributeError: - return tzinfo.utcoffset(obj) # helper to extract datetime and int64 from several different possibilities cdef convert_to_tsobject(object ts, object tz, object unit, @@ -1712,48 +1709,6 @@ def _localize_pydatetime(object dt, object tz): return dt.replace(tzinfo=tz) -def get_timezone(tz): - return _get_zone(tz) - - -cdef inline object _get_zone(object tz): - """ - We need to do several things here: - 1) Distinguish between pytz and dateutil timezones - 2) Not be over-specific (e.g. US/Eastern with/without DST is same *zone* - but a different tz object) - 3) Provide something to serialize when we're storing a datetime object - in pytables. - - We return a string prefaced with dateutil if it's a dateutil tz, else just - the tz name. It needs to be a string so that we can serialize it with - UJSON/pytables. maybe_get_tz (below) is the inverse of this process. - """ - if _is_utc(tz): - return 'UTC' - else: - if _treat_tz_as_dateutil(tz): - if '.tar.gz' in tz._filename: - raise ValueError( - 'Bad tz filename. Dateutil on python 3 on windows has a ' - 'bug which causes tzfile._filename to be the same for all ' - 'timezone files. Please construct dateutil timezones ' - 'implicitly by passing a string like "dateutil/Europe' - '/London" when you construct your pandas objects instead ' - 'of passing a timezone object. See ' - 'https://github.com/pandas-dev/pandas/pull/7362') - return 'dateutil/' + tz._filename - else: - # tz is a pytz timezone or unknown. - try: - zone = tz.zone - if zone is None: - return tz - return zone - except AttributeError: - return tz - - cpdef inline object maybe_get_tz(object tz): """ (Maybe) Construct a timezone object from a string. If tz is a string, use @@ -4285,13 +4240,6 @@ def tz_convert_single(int64_t val, object tz1, object tz2): # Timezone data caches, key is the pytz string or dateutil file name. dst_cache = {} -cdef inline bint _treat_tz_as_pytz(object tz): - return hasattr(tz, '_utc_transition_times') and hasattr( - tz, '_transition_info') - -cdef inline bint _treat_tz_as_dateutil(object tz): - return hasattr(tz, '_trans_list') and hasattr(tz, '_trans_idx') - def _p_tz_cache_key(tz): """ Python interface for cache function to facilitate testing.""" diff --git a/pandas/_libs/tslibs/timezones.pxd b/pandas/_libs/tslibs/timezones.pxd index 0708282abe1d0..897bd8af7e2de 100644 --- a/pandas/_libs/tslibs/timezones.pxd +++ b/pandas/_libs/tslibs/timezones.pxd @@ -2,3 +2,11 @@ # cython: profile=False cdef bint _is_utc(object tz) +cdef bint _is_tzlocal(object tz) + +cdef bint _treat_tz_as_pytz(object tz) +cdef bint _treat_tz_as_dateutil(object tz) + +cdef object _get_zone(object tz) + +cpdef _get_utcoffset(tzinfo, obj) diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx index 43709e77b70d5..249eedef4bb09 100644 --- a/pandas/_libs/tslibs/timezones.pyx +++ b/pandas/_libs/tslibs/timezones.pyx @@ -2,7 +2,9 @@ # cython: profile=False # dateutil compat -from dateutil.tz import tzutc as _dateutil_tzutc +from dateutil.tz import ( + tzutc as _dateutil_tzutc, + tzlocal as _dateutil_tzlocal) import pytz UTC = pytz.utc @@ -10,3 +12,67 @@ UTC = pytz.utc cdef inline bint _is_utc(object tz): return tz is UTC or isinstance(tz, _dateutil_tzutc) + + +cdef inline bint _is_tzlocal(object tz): + return isinstance(tz, _dateutil_tzlocal) + + +cdef inline bint _treat_tz_as_pytz(object tz): + return hasattr(tz, '_utc_transition_times') and hasattr( + tz, '_transition_info') + + +cdef inline bint _treat_tz_as_dateutil(object tz): + return hasattr(tz, '_trans_list') and hasattr(tz, '_trans_idx') + + +cdef inline object _get_zone(object tz): + """ + We need to do several things here: + 1) Distinguish between pytz and dateutil timezones + 2) Not be over-specific (e.g. US/Eastern with/without DST is same *zone* + but a different tz object) + 3) Provide something to serialize when we're storing a datetime object + in pytables. + + We return a string prefaced with dateutil if it's a dateutil tz, else just + the tz name. It needs to be a string so that we can serialize it with + UJSON/pytables. maybe_get_tz (below) is the inverse of this process. + """ + if _is_utc(tz): + return 'UTC' + else: + if _treat_tz_as_dateutil(tz): + if '.tar.gz' in tz._filename: + raise ValueError( + 'Bad tz filename. Dateutil on python 3 on windows has a ' + 'bug which causes tzfile._filename to be the same for all ' + 'timezone files. Please construct dateutil timezones ' + 'implicitly by passing a string like "dateutil/Europe' + '/London" when you construct your pandas objects instead ' + 'of passing a timezone object. See ' + 'https://github.com/pandas-dev/pandas/pull/7362') + return 'dateutil/' + tz._filename + else: + # tz is a pytz timezone or unknown. + try: + zone = tz.zone + if zone is None: + return tz + return zone + except AttributeError: + return tz + + +def get_timezone(tz): + return _get_zone(tz) + +#---------------------------------------------------------------------- +# UTC Offsets + +cpdef _get_utcoffset(tzinfo, obj): + try: + return tzinfo._utcoffset + except AttributeError: + return tzinfo.utcoffset(obj) From 9a8427404efb3df5deda12f76352725d628adf5e Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 12 Sep 2017 06:26:02 -0400 Subject: [PATCH 36/57] DOC: fix parquet example to not use ns --- doc/source/io.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index de3150035c446..8fbb23769492e 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -4515,8 +4515,7 @@ See the documentation for `pyarrow `__ and 'd': np.arange(4.0, 7.0, dtype='float64'), 'e': [True, False, True], 'f': pd.date_range('20130101', periods=3), - 'g': pd.date_range('20130101', periods=3, tz='US/Eastern'), - 'h': pd.date_range('20130101', periods=3, freq='ns')}) + 'g': pd.date_range('20130101', periods=3, tz='US/Eastern')}) df df.dtypes From d46b027e793e0f7b03a9372b82ac68cd35c1f35f Mon Sep 17 00:00:00 2001 From: Michael Penkov Date: Tue, 12 Sep 2017 19:31:32 +0900 Subject: [PATCH 37/57] Prevent UnicodeDecodeError in pivot_table under Py2 (#17489) --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/reshape/pivot.py | 2 +- pandas/tests/reshape/test_pivot.py | 10 ++++++++++ 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 9d475390175b2..fe24f8f499172 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -1705,6 +1705,7 @@ Reshaping - Bug in ``pd.concat()`` in which concatenating with an empty dataframe with ``join='inner'`` was being improperly handled (:issue:`15328`) - Bug with ``sort=True`` in ``DataFrame.join`` and ``pd.merge`` when joining on indexes (:issue:`15582`) - Bug in ``DataFrame.nsmallest`` and ``DataFrame.nlargest`` where identical values resulted in duplicated rows (:issue:`15297`) +- Bug in :func:`pandas.pivot_table` incorrectly raising ``UnicodeError`` when passing unicode input for ```margins`` keyword (:issue:`13292`) Numeric ^^^^^^^ diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index f07123ca18489..d19de6030d473 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -145,7 +145,7 @@ def _add_margins(table, data, values, rows, cols, aggfunc, if not isinstance(margins_name, compat.string_types): raise ValueError('margins_name argument must be a string') - msg = 'Conflicting name "{name}" in margins'.format(name=margins_name) + msg = u'Conflicting name "{name}" in margins'.format(name=margins_name) for level in table.index.names: if margins_name in table.index.get_level_values(level): raise ValueError(msg) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 879ac96680fbb..bd8a999ce2330 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -1625,3 +1625,13 @@ def test_isleapyear_deprecate(self): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): assert isleapyear(2004) + + def test_pivot_margins_name_unicode(self): + # issue #13292 + greek = u'\u0394\u03bf\u03ba\u03b9\u03bc\u03ae' + frame = pd.DataFrame({'foo': [1, 2, 3]}) + table = pd.pivot_table(frame, index=['foo'], aggfunc=len, margins=True, + margins_name=greek) + index = pd.Index([1, 2, 3, greek], dtype='object', name='foo') + expected = pd.DataFrame(index=index) + tm.assert_frame_equal(table, expected) From e682902327bd883a207b291b0326f277b3dcdd12 Mon Sep 17 00:00:00 2001 From: T N Date: Tue, 12 Sep 2017 19:35:55 +0900 Subject: [PATCH 38/57] DEPR: Add warning for True for dropna of SeriesGroupBy.nth (#17493) --- doc/source/whatsnew/v0.21.0.txt | 2 ++ pandas/core/groupby.py | 21 +++++++++++++++------ pandas/tests/groupby/test_nth.py | 10 ++++++++-- 3 files changed, 25 insertions(+), 8 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index eccd71f45ec27..33232d2b09416 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -341,6 +341,8 @@ Deprecations - ``pd.options.html.border`` has been deprecated in favor of ``pd.options.display.html.border`` (:issue:`15793`). +- :func:`SeriesGroupBy.nth` has deprecated ``True`` in favor of ``'all'`` for its kwarg ``dropna`` (:issue:`11038`). + .. _whatsnew_0210.prior_deprecations: Removal of prior version deprecations/changes diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 248f3b2095a78..f14ed08a27fae 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -1393,12 +1393,21 @@ def nth(self, n, dropna=None): return out.sort_index() if self.sort else out - if isinstance(self._selected_obj, DataFrame) and \ - dropna not in ['any', 'all']: - # Note: when agg-ing picker doesn't raise this, just returns NaN - raise ValueError("For a DataFrame groupby, dropna must be " - "either None, 'any' or 'all', " - "(was passed %s)." % (dropna),) + if dropna not in ['any', 'all']: + if isinstance(self._selected_obj, Series) and dropna is True: + warnings.warn("the dropna='%s' keyword is deprecated," + "use dropna='all' instead. " + "For a Series groupby, dropna must be " + "either None, 'any' or 'all'." % (dropna), + FutureWarning, + stacklevel=2) + dropna = 'all' + else: + # Note: when agg-ing picker doesn't raise this, + # just returns NaN + raise ValueError("For a DataFrame groupby, dropna must be " + "either None, 'any' or 'all', " + "(was passed %s)." % (dropna),) # old behaviour, but with all and any support for DataFrames. # modified in GH 7559 to have better perf diff --git a/pandas/tests/groupby/test_nth.py b/pandas/tests/groupby/test_nth.py index 28392537be3c6..ffbede0eb208f 100644 --- a/pandas/tests/groupby/test_nth.py +++ b/pandas/tests/groupby/test_nth.py @@ -2,7 +2,10 @@ import pandas as pd from pandas import DataFrame, MultiIndex, Index, Series, isna from pandas.compat import lrange -from pandas.util.testing import assert_frame_equal, assert_series_equal +from pandas.util.testing import ( + assert_frame_equal, + assert_produces_warning, + assert_series_equal) from .common import MixIn @@ -171,7 +174,10 @@ def test_nth(self): # doc example df = DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B']) g = df.groupby('A') - result = g.B.nth(0, dropna=True) + # PR 17493, related to issue 11038 + # test Series.nth with True for dropna produces DeprecationWarning + with assert_produces_warning(FutureWarning): + result = g.B.nth(0, dropna=True) expected = g.B.first() assert_series_equal(result, expected) From 83436af8ae1ccad49b7ceac7471c060d823d10ab Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 12 Sep 2017 08:54:53 -0400 Subject: [PATCH 39/57] COMPAT: Iteration should always yield a python scalar (#17491) xref #10904 closes #13236 closes #13256 xref #14216 --- doc/source/whatsnew/v0.21.0.txt | 47 ++++++++++++++++ pandas/core/base.py | 25 ++++++++- pandas/core/categorical.py | 6 ++ pandas/core/indexes/base.py | 9 --- pandas/core/indexes/category.py | 4 ++ pandas/core/series.py | 13 ----- pandas/core/sparse/array.py | 12 +++- pandas/tests/frame/test_api.py | 11 ++-- pandas/tests/frame/test_convert_to.py | 13 +++++ pandas/tests/series/test_io.py | 36 +----------- pandas/tests/test_base.py | 79 +++++++++++++++++++++++++-- 11 files changed, 187 insertions(+), 68 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 33232d2b09416..89da897f6c529 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -188,6 +188,53 @@ the target. Now, a ``ValueError`` will be raised when such an input is passed in ... ValueError: Cannot operate inplace if there is no assignment +.. _whatsnew_0210.api_breaking.iteration_scalars: + +Iteration of Series/Index will now return python scalars +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Previously, when using certain iteration methods for a ``Series`` with dtype ``int`` or ``float``, you would receive a ``numpy`` scalar, e.g. a ``np.int64``, rather than a python ``int``. Issue (:issue:`10904`) corrected this for ``Series.tolist()`` and ``list(Series)``. This change makes all iteration methods consistent, in particular, for ``__iter__()`` and ``.map()``; note that this only affect int/float dtypes. (:issue:`13236`, :issue:`13258`, :issue:`14216`). + +.. ipython:: python + + s = Series([1, 2, 3]) + s + +Previously: + +.. code-block:: python + + In [2]: type(list(s)[0]) + Out[2]: numpy.int64 + +New Behaviour: + +.. ipython:: python + + type(list(s)[0]) + +Furthermore this will now correctly box the results of iteration for :func:`DataFrame.to_dict` as well. + +.. ipython:: python + + d = {'a':[1], 'b':['b']} + df = DataFrame(d) + +Previously: + +.. code-block:: python + + In [8]: type(df.to_dict()['a'][0]) + Out[8]: numpy.int64 + +New Behaviour: + +.. ipython:: python + + type(df.to_dict()['a'][0]) + +.. _whatsnew_0210.api_breaking.dtype_conversions: + Dtype Conversions ^^^^^^^^^^^^^^^^^ diff --git a/pandas/core/base.py b/pandas/core/base.py index d60a8515dc920..62d89eac4b354 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -8,7 +8,12 @@ from pandas.core.dtypes.missing import isna from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries, ABCIndexClass -from pandas.core.dtypes.common import is_object_dtype, is_list_like, is_scalar +from pandas.core.dtypes.common import ( + is_object_dtype, + is_list_like, + is_scalar, + is_datetimelike) + from pandas.util._validators import validate_bool_kwarg from pandas.core import common as com @@ -18,7 +23,8 @@ from pandas.compat import PYPY from pandas.util._decorators import (Appender, cache_readonly, deprecate_kwarg, Substitution) -from pandas.core.common import AbstractMethodError +from pandas.core.common import AbstractMethodError, _maybe_box_datetimelike + from pandas.core.accessor import DirNamesMixin _shared_docs = dict() @@ -884,6 +890,21 @@ def argmin(self, axis=None): """ return nanops.nanargmin(self.values) + def tolist(self): + """ + return a list of the values; box to scalars + """ + return list(self.__iter__()) + + def __iter__(self): + """ + provide iteration over the values; box to scalars + """ + if is_datetimelike(self): + return (_maybe_box_datetimelike(x) for x in self._values) + else: + return iter(self._values.tolist()) + @cache_readonly def hasnans(self): """ return if I have any nans; enables various perf speedups """ diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index 1c2a29333001c..dbd2a79b7e46d 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -399,6 +399,12 @@ def itemsize(self): """ return the size of a single category """ return self.categories.itemsize + def tolist(self): + """ + return a list of my values + """ + return np.array(self).tolist() + def reshape(self, new_shape, *args, **kwargs): """ .. deprecated:: 0.19.0 diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index ef5f68936044a..008828cf4f309 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -585,12 +585,6 @@ def memory_usage(self, deep=False): return result # ops compat - def tolist(self): - """ - return a list of the Index values - """ - return list(self.values) - @deprecate_kwarg(old_arg_name='n', new_arg_name='repeats') def repeat(self, repeats, *args, **kwargs): """ @@ -1601,9 +1595,6 @@ def is_all_dates(self): return False return is_datetime_array(_ensure_object(self.values)) - def __iter__(self): - return iter(self.values) - def __reduce__(self): d = dict(data=self._data) d.update(self._get_attributes_dict()) diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 0681202289311..c8044b14e4e57 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -253,6 +253,10 @@ def get_values(self): """ return the underlying data as an ndarray """ return self._data.get_values() + def __iter__(self): + """ iterate like Categorical """ + return self._data.__iter__() + @property def codes(self): return self._data.codes diff --git a/pandas/core/series.py b/pandas/core/series.py index 6905fc1aced74..ac11c5f908fdc 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -19,7 +19,6 @@ is_integer, is_integer_dtype, is_float_dtype, is_extension_type, is_datetimetz, - is_datetimelike, is_datetime64tz_dtype, is_timedelta64_dtype, is_list_like, @@ -1095,14 +1094,6 @@ def to_string(self, buf=None, na_rep='NaN', float_format=None, header=True, with open(buf, 'w') as f: f.write(result) - def __iter__(self): - """ provide iteration over the values of the Series - box values if necessary """ - if is_datetimelike(self): - return (_maybe_box_datetimelike(x) for x in self._values) - else: - return iter(self._values) - def iteritems(self): """ Lazily iterate over (index, value) tuples @@ -1118,10 +1109,6 @@ def keys(self): """Alias for index""" return self.index - def tolist(self): - """ Convert Series to a nested list """ - return list(self.asobject) - def to_dict(self, into=dict): """ Convert Series to {label -> value} dict or dict-like object. diff --git a/pandas/core/sparse/array.py b/pandas/core/sparse/array.py index 2f830a98db649..f965c91999a03 100644 --- a/pandas/core/sparse/array.py +++ b/pandas/core/sparse/array.py @@ -407,8 +407,18 @@ def to_dense(self, fill=None): return self.values def __iter__(self): + if np.issubdtype(self.dtype, np.floating): + boxer = float + elif np.issubdtype(self.dtype, np.integer): + boxer = int + else: + boxer = lambda x: x + for i in range(len(self)): - yield self._get_val_at(i) + r = self._get_val_at(i) + + # box em + yield boxer(r) def __getitem__(self, key): """ diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index a62fcb506a34b..b3209da6449d6 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -9,7 +9,7 @@ import sys from distutils.version import LooseVersion -from pandas.compat import range, lrange +from pandas.compat import range, lrange, long from pandas import compat from numpy.random import randn @@ -205,15 +205,18 @@ def test_itertuples(self): 'ints': lrange(5)}, columns=['floats', 'ints']) for tup in df.itertuples(index=False): - assert isinstance(tup[1], np.integer) + assert isinstance(tup[1], (int, long)) df = self.klass(data={"a": [1, 2, 3], "b": [4, 5, 6]}) dfaa = df[['a', 'a']] assert (list(dfaa.itertuples()) == [(0, 1, 1), (1, 2, 2), (2, 3, 3)]) - assert (repr(list(df.itertuples(name=None))) == - '[(0, 1, 4), (1, 2, 5), (2, 3, 6)]') + + # repr with be int/long on windows + if not compat.is_platform_windows(): + assert (repr(list(df.itertuples(name=None))) == + '[(0, 1, 4), (1, 2, 5), (2, 3, 6)]') tup = next(df.itertuples(name='TestName')) diff --git a/pandas/tests/frame/test_convert_to.py b/pandas/tests/frame/test_convert_to.py index 629c695b702fe..99e5630ce6a43 100644 --- a/pandas/tests/frame/test_convert_to.py +++ b/pandas/tests/frame/test_convert_to.py @@ -5,6 +5,7 @@ import numpy as np from pandas import compat +from pandas.compat import long from pandas import (DataFrame, Series, MultiIndex, Timestamp, date_range) @@ -236,3 +237,15 @@ def test_to_records_datetimeindex_with_tz(self, tz): # both converted to UTC, so they are equal tm.assert_numpy_array_equal(result, expected) + + def test_to_dict_box_scalars(self): + # 14216 + # make sure that we are boxing properly + d = {'a': [1], 'b': ['b']} + + result = DataFrame(d).to_dict() + assert isinstance(list(result['a'])[0], (int, long)) + assert isinstance(list(result['b'])[0], (int, long)) + + result = DataFrame(d).to_dict(orient='records') + assert isinstance(result[0]['a'], (int, long)) diff --git a/pandas/tests/series/test_io.py b/pandas/tests/series/test_io.py index 503185de427f1..5b7fd1ec94a90 100644 --- a/pandas/tests/series/test_io.py +++ b/pandas/tests/series/test_io.py @@ -10,7 +10,7 @@ from pandas import Series, DataFrame -from pandas.compat import StringIO, u, long +from pandas.compat import StringIO, u from pandas.util.testing import (assert_series_equal, assert_almost_equal, assert_frame_equal, ensure_clean) import pandas.util.testing as tm @@ -178,37 +178,3 @@ def test_to_dict(self, mapping): from_method = Series(ts.to_dict(collections.Counter)) from_constructor = Series(collections.Counter(ts.iteritems())) tm.assert_series_equal(from_method, from_constructor) - - -class TestSeriesToList(TestData): - - def test_tolist(self): - rs = self.ts.tolist() - xp = self.ts.values.tolist() - assert_almost_equal(rs, xp) - - # datetime64 - s = Series(self.ts.index) - rs = s.tolist() - assert self.ts.index[0] == rs[0] - - def test_tolist_np_int(self): - # GH10904 - for t in ['int8', 'int16', 'int32', 'int64']: - s = pd.Series([1], dtype=t) - assert isinstance(s.tolist()[0], (int, long)) - - def test_tolist_np_uint(self): - # GH10904 - for t in ['uint8', 'uint16']: - s = pd.Series([1], dtype=t) - assert isinstance(s.tolist()[0], int) - for t in ['uint32', 'uint64']: - s = pd.Series([1], dtype=t) - assert isinstance(s.tolist()[0], long) - - def test_tolist_np_float(self): - # GH10904 - for t in ['float16', 'float32', 'float64']: - s = pd.Series([1], dtype=t) - assert isinstance(s.tolist()[0], float) diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index 9e92c7cf1a9b8..210d0260b8d95 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -13,9 +13,10 @@ is_object_dtype, is_datetimetz, needs_i8_conversion) import pandas.util.testing as tm -from pandas import (Series, Index, DatetimeIndex, TimedeltaIndex, PeriodIndex, - Timedelta, IntervalIndex, Interval) -from pandas.compat import StringIO, PYPY +from pandas import (Series, Index, DatetimeIndex, TimedeltaIndex, + PeriodIndex, Timedelta, IntervalIndex, Interval, + CategoricalIndex, Timestamp) +from pandas.compat import StringIO, PYPY, long from pandas.compat.numpy import np_array_datetime64_compat from pandas.core.base import PandasDelegate, NoNewAttributesMixin from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin @@ -433,7 +434,7 @@ def test_value_counts_unique_nunique(self): # datetimetz Series returns array of Timestamp assert result[0] == orig[0] for r in result: - assert isinstance(r, pd.Timestamp) + assert isinstance(r, Timestamp) tm.assert_numpy_array_equal(result, orig._values.asobject.values) else: @@ -1031,3 +1032,73 @@ def f(): pytest.raises(AttributeError, f) assert not hasattr(t, "b") + + +class TestToIterable(object): + # test that we convert an iterable to python types + + dtypes = [ + ('int8', (int, long)), + ('int16', (int, long)), + ('int32', (int, long)), + ('int64', (int, long)), + ('uint8', (int, long)), + ('uint16', (int, long)), + ('uint32', (int, long)), + ('uint64', (int, long)), + ('float16', float), + ('float32', float), + ('float64', float), + ('datetime64[ns]', Timestamp), + ('datetime64[ns, US/Eastern]', Timestamp), + ('timedelta64[ns]', Timedelta)] + + @pytest.mark.parametrize( + 'dtype, rdtype', + dtypes + [ + ('object', object), + ('category', object)]) + @pytest.mark.parametrize( + 'method', + [ + lambda x: x.tolist(), + lambda x: list(x), + lambda x: list(x.__iter__()), + ], ids=['tolist', 'list', 'iter']) + @pytest.mark.parametrize('typ', [Series, Index]) + def test_iterable(self, typ, method, dtype, rdtype): + # gh-10904 + # gh-13258 + # coerce iteration to underlying python / pandas types + s = typ([1], dtype=dtype) + result = method(s)[0] + assert isinstance(result, rdtype) + + @pytest.mark.parametrize( + 'dtype, rdtype', + dtypes + [ + ('object', (int, long)), + ('category', (int, long))]) + @pytest.mark.parametrize('typ', [Series, Index]) + def test_iterable_map(self, typ, dtype, rdtype): + # gh-13236 + # coerce iteration to underlying python / pandas types + s = typ([1], dtype=dtype) + result = s.map(type)[0] + if not isinstance(rdtype, tuple): + rdtype = tuple([rdtype]) + assert result in rdtype + + @pytest.mark.parametrize( + 'method', + [ + lambda x: x.tolist(), + lambda x: list(x), + lambda x: list(x.__iter__()), + ], ids=['tolist', 'list', 'iter']) + def test_categorial_datetimelike(self, method): + i = CategoricalIndex([Timestamp('1999-12-31'), + Timestamp('2000-12-31')]) + + result = method(i)[0] + assert isinstance(result, Timestamp) From 633be31adcd43fc8bfe9a9fd9e7621ff3fc8ccbd Mon Sep 17 00:00:00 2001 From: Giftlin <31629119+Giftlin@users.noreply.github.com> Date: Wed, 13 Sep 2017 15:33:30 +0530 Subject: [PATCH 40/57] DOC: grammatical mistake (#17511) --- pandas/plotting/_tools.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/plotting/_tools.py b/pandas/plotting/_tools.py index 389e238ccb96e..6deddc97915f1 100644 --- a/pandas/plotting/_tools.py +++ b/pandas/plotting/_tools.py @@ -141,7 +141,7 @@ def _subplots(naxes=None, sharex=False, sharey=False, squeeze=True, array of Axis objects are returned as numpy 1-d arrays. - for NxM subplots with N>1 and M>1 are returned as a 2d array. - If False, no squeezing at all is done: the returned axis object is always + If False, no squeezing is done: the returned axis object is always a 2-d array containing Axis instances, even if it ends up being 1x1. subplot_kw : dict From f6d4d7078d49503adf990f0c159eb603ca1f0c1a Mon Sep 17 00:00:00 2001 From: topper-123 Date: Wed, 13 Sep 2017 11:04:32 +0100 Subject: [PATCH 41/57] removed versionadded <0.17 (#17504) --- doc/source/whatsnew/v0.21.0.txt | 2 +- pandas/core/frame.py | 6 ------ pandas/core/generic.py | 6 ------ pandas/core/indexes/category.py | 2 -- pandas/core/indexes/datetimes.py | 2 +- pandas/core/reshape/reshape.py | 2 -- pandas/core/sparse/series.py | 4 ---- pandas/core/strings.py | 5 ----- 8 files changed, 2 insertions(+), 27 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 89da897f6c529..6ffa903c74150 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -526,4 +526,4 @@ Other ^^^^^ - Bug in :func:`eval` where the ``inplace`` parameter was being incorrectly handled (:issue:`16732`) - Several ``NaT`` method docstrings (e.g. :func:`NaT.ctime`) were incorrect (:issue:`17327`) -- The documentation has had references to versions < v0.16 removed and cleaned up (:issue:`17442`, :issue:`17442` & :issue:`#17404`) +- The documentation has had references to versions < v0.17 removed and cleaned up (:issue:`17442`, :issue:`17442`, :issue:`17404` & :issue:`17504`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 5991ec825c841..dd5d490ea66a8 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1479,8 +1479,6 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None, Character recognized as decimal separator. E.g. use ',' for European data - .. versionadded:: 0.16.0 - """ formatter = fmt.CSVFormatter(self, path_or_buf, line_terminator=line_terminator, sep=sep, @@ -2165,8 +2163,6 @@ def _getitem_frame(self, key): def query(self, expr, inplace=False, **kwargs): """Query the columns of a frame with a boolean expression. - .. versionadded:: 0.13 - Parameters ---------- expr : string @@ -2561,8 +2557,6 @@ def assign(self, **kwargs): Assign new columns to a DataFrame, returning a new object (a copy) with all the original columns in addition to the new ones. - .. versionadded:: 0.16.0 - Parameters ---------- kwargs : keyword, value pairs diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 8d16b079ba2c8..a71bf7be1bc75 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2348,8 +2348,6 @@ def drop(self, labels, axis=0, level=None, inplace=False, errors='raise'): errors : {'ignore', 'raise'}, default 'raise' If 'ignore', suppress error and existing labels are dropped. - .. versionadded:: 0.16.1 - Returns ------- dropped : type of caller @@ -3070,8 +3068,6 @@ def sample(self, n=None, frac=None, replace=False, weights=None, """ Returns a random sample of items from an axis of object. - .. versionadded:: 0.16.1 - Parameters ---------- n : int, optional @@ -3228,8 +3224,6 @@ def sample(self, n=None, frac=None, replace=False, weights=None, _shared_docs['pipe'] = (""" Apply func(self, \*args, \*\*kwargs) - .. versionadded:: 0.16.2 - Parameters ---------- func : function diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index c8044b14e4e57..baa3ebce6abbc 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -33,8 +33,6 @@ class CategoricalIndex(Index, base.PandasDelegate): Immutable Index implementing an ordered, sliceable set. CategoricalIndex represents a sparsely populated Index with an underlying Categorical. - .. versionadded:: 0.16.1 - Parameters ---------- data : array-like or Categorical, (1-dimensional) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 5a04c550f4502..4cfb7547e7d0a 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -1577,7 +1577,7 @@ def _set_freq(self, value): days_in_month = _field_accessor( 'days_in_month', 'dim', - "The number of days in the month\n\n.. versionadded:: 0.16.0") + "The number of days in the month") daysinmonth = days_in_month is_month_start = _field_accessor( 'is_month_start', diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index b4abba8026b35..7260bc9a8b7a1 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -1110,8 +1110,6 @@ def get_dummies(data, prefix=None, prefix_sep='_', dummy_na=False, Whether the dummy columns should be sparse or not. Returns SparseDataFrame if `data` is a Series or if all columns are included. Otherwise returns a DataFrame with some SparseBlocks. - - .. versionadded:: 0.16.1 drop_first : bool, default False Whether to get k-1 dummies out of k categorical levels by removing the first level. diff --git a/pandas/core/sparse/series.py b/pandas/core/sparse/series.py index 99aec2dd11569..2aecb9d7c4ffb 100644 --- a/pandas/core/sparse/series.py +++ b/pandas/core/sparse/series.py @@ -732,8 +732,6 @@ def to_coo(self, row_levels=(0, ), column_levels=(1, ), sort_labels=False): (labels) or numbers of the levels. {row_levels, column_levels} must be a partition of the MultiIndex level names (or numbers). - .. versionadded:: 0.16.0 - Parameters ---------- row_levels : tuple/list @@ -784,8 +782,6 @@ def from_coo(cls, A, dense_index=False): """ Create a SparseSeries from a scipy.sparse.coo_matrix. - .. versionadded:: 0.16.0 - Parameters ---------- A : scipy.sparse.coo_matrix diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 48bc2ee05dd68..021f88d1aec00 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -602,8 +602,6 @@ def str_extract(arr, pat, flags=0, expand=None): For each subject string in the Series, extract groups from the first match of regular expression pat. - .. versionadded:: 0.13.0 - Parameters ---------- pat : string @@ -1016,7 +1014,6 @@ def str_split(arr, pat=None, n=None): * If True, return DataFrame/MultiIndex expanding dimensionality. * If False, return Series/Index. - .. versionadded:: 0.16.1 return_type : deprecated, use `expand` Returns @@ -1047,8 +1044,6 @@ def str_rsplit(arr, pat=None, n=None): string, starting at the end of the string and working to the front. Equivalent to :meth:`str.rsplit`. - .. versionadded:: 0.16.2 - Parameters ---------- pat : string, default None From f11bbf2f505d81900cc83ce387a6a1b1d2a2f866 Mon Sep 17 00:00:00 2001 From: Giftlin <31629119+Giftlin@users.noreply.github.com> Date: Wed, 13 Sep 2017 17:54:57 +0530 Subject: [PATCH 42/57] DOC: grammatical mistakes (#17512) --- pandas/io/stata.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 253ed03c25db9..92f180506a8b7 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -57,7 +57,7 @@ identifier of column that should be used as index of the DataFrame convert_missing : boolean, defaults to False Flag indicating whether to convert missing values to their Stata - representations. If False, missing values are replaced with nans. + representations. If False, missing values are replaced with nan. If True, columns containing missing values are returned with object data types and missing values are represented by StataMissingValue objects. @@ -248,8 +248,9 @@ def _stata_elapsed_date_to_datetime_vec(dates, fmt): def convert_year_month_safe(year, month): """ Convert year and month to datetimes, using pandas vectorized versions - when the date range falls within the range supported by pandas. Other - wise it falls back to a slower but more robust method using datetime. + when the date range falls within the range supported by pandas. + Otherwise it falls back to a slower but more robust method + using datetime. """ if year.max() < MAX_YEAR and year.min() > MIN_YEAR: return to_datetime(100 * year + month, format='%Y%m') From eef810ef2c64be00943696b33e8bab0b4dd66e9e Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 13 Sep 2017 19:18:56 -0400 Subject: [PATCH 43/57] COMPAT: followup to #17491 (#17503) --- doc/source/whatsnew/v0.21.0.txt | 14 ++--- pandas/core/base.py | 27 ++++++--- pandas/core/categorical.py | 10 +++- pandas/core/indexes/category.py | 5 +- pandas/tests/indexes/test_category.py | 13 +++-- pandas/tests/series/test_api.py | 37 ------------- pandas/tests/test_base.py | 79 +++++++++++++++++++++++++-- 7 files changed, 119 insertions(+), 66 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 6ffa903c74150..9da1f321ef574 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -190,19 +190,19 @@ the target. Now, a ``ValueError`` will be raised when such an input is passed in .. _whatsnew_0210.api_breaking.iteration_scalars: -Iteration of Series/Index will now return python scalars +Iteration of Series/Index will now return Python scalars ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Previously, when using certain iteration methods for a ``Series`` with dtype ``int`` or ``float``, you would receive a ``numpy`` scalar, e.g. a ``np.int64``, rather than a python ``int``. Issue (:issue:`10904`) corrected this for ``Series.tolist()`` and ``list(Series)``. This change makes all iteration methods consistent, in particular, for ``__iter__()`` and ``.map()``; note that this only affect int/float dtypes. (:issue:`13236`, :issue:`13258`, :issue:`14216`). +Previously, when using certain iteration methods for a ``Series`` with dtype ``int`` or ``float``, you would receive a ``numpy`` scalar, e.g. a ``np.int64``, rather than a Python ``int``. Issue (:issue:`10904`) corrected this for ``Series.tolist()`` and ``list(Series)``. This change makes all iteration methods consistent, in particular, for ``__iter__()`` and ``.map()``; note that this only affects int/float dtypes. (:issue:`13236`, :issue:`13258`, :issue:`14216`). .. ipython:: python - s = Series([1, 2, 3]) + s = pd.Series([1, 2, 3]) s Previously: -.. code-block:: python +.. code-block:: ipython In [2]: type(list(s)[0]) Out[2]: numpy.int64 @@ -215,14 +215,14 @@ New Behaviour: Furthermore this will now correctly box the results of iteration for :func:`DataFrame.to_dict` as well. -.. ipython:: python +.. ipython:: ipython d = {'a':[1], 'b':['b']} - df = DataFrame(d) + df = pd,DataFrame(d) Previously: -.. code-block:: python +.. code-block:: ipython In [8]: type(df.to_dict()['a'][0]) Out[8]: numpy.int64 diff --git a/pandas/core/base.py b/pandas/core/base.py index 62d89eac4b354..f0e8d8a16661b 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -892,18 +892,31 @@ def argmin(self, axis=None): def tolist(self): """ - return a list of the values; box to scalars + Return a list of the values. + + These are each a scalar type, which is a Python scalar + (for str, int, float) or a pandas scalar + (for Timestamp/Timedelta/Interval/Period) + + See Also + -------- + numpy.tolist """ - return list(self.__iter__()) + + if is_datetimelike(self): + return [_maybe_box_datetimelike(x) for x in self._values] + else: + return self._values.tolist() def __iter__(self): """ - provide iteration over the values; box to scalars + Return an iterator of the values. + + These are each a scalar type, which is a Python scalar + (for str, int, float) or a pandas scalar + (for Timestamp/Timedelta/Interval/Period) """ - if is_datetimelike(self): - return (_maybe_box_datetimelike(x) for x in self._values) - else: - return iter(self._values.tolist()) + return iter(self.tolist()) @cache_readonly def hasnans(self): diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index dbd2a79b7e46d..97df72900428c 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -26,7 +26,7 @@ is_integer_dtype, is_bool, is_list_like, is_sequence, is_scalar) -from pandas.core.common import is_null_slice +from pandas.core.common import is_null_slice, _maybe_box_datetimelike from pandas.core.algorithms import factorize, take_1d, unique1d from pandas.core.base import (PandasObject, PandasDelegate, @@ -401,8 +401,14 @@ def itemsize(self): def tolist(self): """ - return a list of my values + Return a list of the values. + + These are each a scalar type, which is a Python scalar + (for str, int, float) or a pandas scalar + (for Timestamp/Timedelta/Interval/Period) """ + if is_datetimelike(self.categories): + return [_maybe_box_datetimelike(x) for x in self] return np.array(self).tolist() def reshape(self, new_shape, *args, **kwargs): diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index baa3ebce6abbc..71cd4790ac364 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -251,9 +251,8 @@ def get_values(self): """ return the underlying data as an ndarray """ return self._data.get_values() - def __iter__(self): - """ iterate like Categorical """ - return self._data.__iter__() + def tolist(self): + return self._data.tolist() @property def codes(self): diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index 05d31af57b36c..aac68ebd6abed 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -576,12 +576,13 @@ def test_isin(self): ci.isin(['c', 'a', 'b', np.nan]), np.array([True] * 6)) # mismatched categorical -> coerced to ndarray so doesn't matter - tm.assert_numpy_array_equal( - ci.isin(ci.set_categories(list('abcdefghi'))), np.array([True] * - 6)) - tm.assert_numpy_array_equal( - ci.isin(ci.set_categories(list('defghi'))), - np.array([False] * 5 + [True])) + result = ci.isin(ci.set_categories(list('abcdefghi'))) + expected = np.array([True] * 6) + tm.assert_numpy_array_equal(result, expected) + + result = ci.isin(ci.set_categories(list('defghi'))) + expected = np.array([False] * 5 + [True]) + tm.assert_numpy_array_equal(result, expected) def test_identical(self): diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py index b7fbe803f8d3b..d0805e2bb54d2 100644 --- a/pandas/tests/series/test_api.py +++ b/pandas/tests/series/test_api.py @@ -245,43 +245,6 @@ def test_iter(self): for i, val in enumerate(self.ts): assert val == self.ts[i] - def test_iter_box(self): - vals = [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02')] - s = pd.Series(vals) - assert s.dtype == 'datetime64[ns]' - for res, exp in zip(s, vals): - assert isinstance(res, pd.Timestamp) - assert res.tz is None - assert res == exp - - vals = [pd.Timestamp('2011-01-01', tz='US/Eastern'), - pd.Timestamp('2011-01-02', tz='US/Eastern')] - s = pd.Series(vals) - - assert s.dtype == 'datetime64[ns, US/Eastern]' - for res, exp in zip(s, vals): - assert isinstance(res, pd.Timestamp) - assert res.tz == exp.tz - assert res == exp - - # timedelta - vals = [pd.Timedelta('1 days'), pd.Timedelta('2 days')] - s = pd.Series(vals) - assert s.dtype == 'timedelta64[ns]' - for res, exp in zip(s, vals): - assert isinstance(res, pd.Timedelta) - assert res == exp - - # period (object dtype, not boxed) - vals = [pd.Period('2011-01-01', freq='M'), - pd.Period('2011-01-02', freq='M')] - s = pd.Series(vals) - assert s.dtype == 'object' - for res, exp in zip(s, vals): - assert isinstance(res, pd.Period) - assert res.freq == 'M' - assert res == exp - def test_keys(self): # HACK: By doing this in two stages, we avoid 2to3 wrapping the call # to .keys() in a list() diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index 210d0260b8d95..38d78b12b31aa 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -1054,10 +1054,7 @@ class TestToIterable(object): ('timedelta64[ns]', Timedelta)] @pytest.mark.parametrize( - 'dtype, rdtype', - dtypes + [ - ('object', object), - ('category', object)]) + 'dtype, rdtype', dtypes) @pytest.mark.parametrize( 'method', [ @@ -1074,6 +1071,43 @@ def test_iterable(self, typ, method, dtype, rdtype): result = method(s)[0] assert isinstance(result, rdtype) + @pytest.mark.parametrize( + 'dtype, rdtype, obj', + [ + ('object', object, 'a'), + ('object', (int, long), 1), + ('category', object, 'a'), + ('category', (int, long), 1)]) + @pytest.mark.parametrize( + 'method', + [ + lambda x: x.tolist(), + lambda x: list(x), + lambda x: list(x.__iter__()), + ], ids=['tolist', 'list', 'iter']) + @pytest.mark.parametrize('typ', [Series, Index]) + def test_iterable_object_and_category(self, typ, method, + dtype, rdtype, obj): + # gh-10904 + # gh-13258 + # coerce iteration to underlying python / pandas types + s = typ([obj], dtype=dtype) + result = method(s)[0] + assert isinstance(result, rdtype) + + @pytest.mark.parametrize( + 'dtype, rdtype', dtypes) + def test_iterable_items(self, dtype, rdtype): + # gh-13258 + # test items / iteritems yields the correct boxed scalars + # this only applies to series + s = Series([1], dtype=dtype) + _, result = list(s.items())[0] + assert isinstance(result, rdtype) + + _, result = list(s.iteritems())[0] + assert isinstance(result, rdtype) + @pytest.mark.parametrize( 'dtype, rdtype', dtypes + [ @@ -1102,3 +1136,40 @@ def test_categorial_datetimelike(self, method): result = method(i)[0] assert isinstance(result, Timestamp) + + def test_iter_box(self): + vals = [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02')] + s = pd.Series(vals) + assert s.dtype == 'datetime64[ns]' + for res, exp in zip(s, vals): + assert isinstance(res, pd.Timestamp) + assert res.tz is None + assert res == exp + + vals = [pd.Timestamp('2011-01-01', tz='US/Eastern'), + pd.Timestamp('2011-01-02', tz='US/Eastern')] + s = pd.Series(vals) + + assert s.dtype == 'datetime64[ns, US/Eastern]' + for res, exp in zip(s, vals): + assert isinstance(res, pd.Timestamp) + assert res.tz == exp.tz + assert res == exp + + # timedelta + vals = [pd.Timedelta('1 days'), pd.Timedelta('2 days')] + s = pd.Series(vals) + assert s.dtype == 'timedelta64[ns]' + for res, exp in zip(s, vals): + assert isinstance(res, pd.Timedelta) + assert res == exp + + # period (object dtype, not boxed) + vals = [pd.Period('2011-01-01', freq='M'), + pd.Period('2011-01-02', freq='M')] + s = pd.Series(vals) + assert s.dtype == 'object' + for res, exp in zip(s, vals): + assert isinstance(res, pd.Period) + assert res.freq == 'M' + assert res == exp From fa557f7391589f351b1260f46b3b3db22492f50b Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 13 Sep 2017 16:20:53 -0700 Subject: [PATCH 44/57] De-privatize timezone funcs (#17502) --- pandas/_libs/index.pyx | 6 +- pandas/_libs/period.pyx | 14 ++--- pandas/_libs/src/inference.pyx | 6 +- pandas/_libs/tslib.pyx | 96 +++++++++++++++---------------- pandas/_libs/tslibs/timezones.pxd | 12 ++-- pandas/_libs/tslibs/timezones.pyx | 20 +++---- 6 files changed, 75 insertions(+), 79 deletions(-) diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index bf4d53683c9b7..884117799ec5b 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -17,7 +17,7 @@ cimport tslib from hashtable cimport HashTable -from tslibs.timezones cimport _is_utc +from tslibs.timezones cimport is_utc, get_utcoffset from pandas._libs import tslib, algos, hashtable as _hash from pandas._libs.tslib import Timestamp, Timedelta from datetime import datetime, timedelta @@ -551,8 +551,8 @@ cdef inline _to_i8(object val): tzinfo = getattr(val, 'tzinfo', None) # Save the original date value so we can get the utcoffset from it. ival = _pydatetime_to_dts(val, &dts) - if tzinfo is not None and not _is_utc(tzinfo): - offset = tslib._get_utcoffset(tzinfo, val) + if tzinfo is not None and not is_utc(tzinfo): + offset = get_utcoffset(tzinfo, val) ival -= tslib._delta_to_nanoseconds(offset) return ival return val diff --git a/pandas/_libs/period.pyx b/pandas/_libs/period.pyx index 2b0734f5cf2e7..9e473a7f362b4 100644 --- a/pandas/_libs/period.pyx +++ b/pandas/_libs/period.pyx @@ -34,7 +34,7 @@ from lib cimport is_null_datetimelike, is_period from pandas._libs import tslib, lib from pandas._libs.tslib import (Timedelta, Timestamp, iNaT, NaT) -from tslibs.timezones cimport _is_utc, _is_tzlocal, _get_utcoffset +from tslibs.timezones cimport is_utc, is_tzlocal, get_utcoffset from tslib cimport ( maybe_get_tz, _get_dst_info, @@ -533,7 +533,7 @@ cdef _reso_local(ndarray[int64_t] stamps, object tz): ndarray[int64_t] trans, deltas, pos pandas_datetimestruct dts - if _is_utc(tz): + if is_utc(tz): for i in range(n): if stamps[i] == NPY_NAT: continue @@ -541,7 +541,7 @@ cdef _reso_local(ndarray[int64_t] stamps, object tz): curr_reso = _reso_stamp(&dts) if curr_reso < reso: reso = curr_reso - elif _is_tzlocal(tz): + elif is_tzlocal(tz): for i in range(n): if stamps[i] == NPY_NAT: continue @@ -549,7 +549,7 @@ cdef _reso_local(ndarray[int64_t] stamps, object tz): &dts) dt = datetime(dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us, tz) - delta = int(_get_utcoffset(tz, dt).total_seconds()) * 1000000000 + delta = int(get_utcoffset(tz, dt).total_seconds()) * 1000000000 pandas_datetime_to_datetimestruct(stamps[i] + delta, PANDAS_FR_ns, &dts) curr_reso = _reso_stamp(&dts) @@ -597,7 +597,7 @@ cdef ndarray[int64_t] localize_dt64arr_to_period(ndarray[int64_t] stamps, ndarray[int64_t] trans, deltas, pos pandas_datetimestruct dts - if _is_utc(tz): + if is_utc(tz): for i in range(n): if stamps[i] == NPY_NAT: result[i] = NPY_NAT @@ -607,7 +607,7 @@ cdef ndarray[int64_t] localize_dt64arr_to_period(ndarray[int64_t] stamps, dts.hour, dts.min, dts.sec, dts.us, dts.ps, freq) - elif _is_tzlocal(tz): + elif is_tzlocal(tz): for i in range(n): if stamps[i] == NPY_NAT: result[i] = NPY_NAT @@ -616,7 +616,7 @@ cdef ndarray[int64_t] localize_dt64arr_to_period(ndarray[int64_t] stamps, &dts) dt = datetime(dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us, tz) - delta = int(_get_utcoffset(tz, dt).total_seconds()) * 1000000000 + delta = int(get_utcoffset(tz, dt).total_seconds()) * 1000000000 pandas_datetime_to_datetimestruct(stamps[i] + delta, PANDAS_FR_ns, &dts) result[i] = get_period_ordinal(dts.year, dts.month, dts.day, diff --git a/pandas/_libs/src/inference.pyx b/pandas/_libs/src/inference.pyx index 95145ff49b02f..2bb362eab4097 100644 --- a/pandas/_libs/src/inference.pyx +++ b/pandas/_libs/src/inference.pyx @@ -3,7 +3,7 @@ from decimal import Decimal cimport util cimport cython from tslib import NaT -from tslibs.timezones cimport _get_zone +from tslibs.timezones cimport get_timezone from datetime import datetime, timedelta iNaT = util.get_nat() @@ -901,13 +901,13 @@ cpdef bint is_datetime_with_singletz_array(ndarray[object] values): for i in range(n): base_val = values[i] if base_val is not NaT: - base_tz = _get_zone(getattr(base_val, 'tzinfo', None)) + base_tz = get_timezone(getattr(base_val, 'tzinfo', None)) for j in range(i, n): val = values[j] if val is not NaT: tz = getattr(val, 'tzinfo', None) - if base_tz != tz and base_tz != _get_zone(tz): + if base_tz != tz and base_tz != get_timezone(tz): return False break diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index a8ae0fcd733d6..629325c28ea9c 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -108,11 +108,11 @@ iNaT = NPY_NAT from tslibs.timezones cimport ( - _is_utc, _is_tzlocal, - _treat_tz_as_dateutil, _treat_tz_as_pytz, - _get_zone, - _get_utcoffset) -from tslibs.timezones import get_timezone, _get_utcoffset # noqa + is_utc, is_tzlocal, + treat_tz_as_dateutil, treat_tz_as_pytz, + get_timezone, + get_utcoffset) +from tslibs.timezones import get_timezone, get_utcoffset # noqa cdef inline object create_timestamp_from_ts( @@ -160,7 +160,7 @@ def ints_to_pydatetime(ndarray[int64_t] arr, tz=None, freq=None, box=False): func_create = create_datetime_from_ts if tz is not None: - if _is_utc(tz): + if is_utc(tz): for i in range(n): value = arr[i] if value == NPY_NAT: @@ -169,7 +169,7 @@ def ints_to_pydatetime(ndarray[int64_t] arr, tz=None, freq=None, box=False): pandas_datetime_to_datetimestruct( value, PANDAS_FR_ns, &dts) result[i] = func_create(value, dts, tz, freq) - elif _is_tzlocal(tz) or _is_fixed_offset(tz): + elif is_tzlocal(tz) or _is_fixed_offset(tz): for i in range(n): value = arr[i] if value == NPY_NAT: @@ -194,7 +194,7 @@ def ints_to_pydatetime(ndarray[int64_t] arr, tz=None, freq=None, box=False): # Adjust datetime64 timestamp, recompute datetimestruct pos = trans.searchsorted(value, side='right') - 1 - if _treat_tz_as_pytz(tz): + if treat_tz_as_pytz(tz): # find right representation of dst etc in pytz timezone new_tz = tz._tzinfos[tz._transition_info[pos]] else: @@ -242,12 +242,12 @@ def ints_to_pytimedelta(ndarray[int64_t] arr, box=False): cdef inline bint _is_fixed_offset(object tz): - if _treat_tz_as_dateutil(tz): + if treat_tz_as_dateutil(tz): if len(tz._trans_idx) == 0 and len(tz._trans_list) == 0: return 1 else: return 0 - elif _treat_tz_as_pytz(tz): + elif treat_tz_as_pytz(tz): if (len(tz._transition_info) == 0 and len(tz._utc_transition_times) == 0): return 1 @@ -1107,12 +1107,12 @@ cdef class _Timestamp(datetime): try: stamp += self.strftime('%z') if self.tzinfo: - zone = _get_zone(self.tzinfo) + zone = get_timezone(self.tzinfo) except ValueError: year2000 = self.replace(year=2000) stamp += year2000.strftime('%z') if self.tzinfo: - zone = _get_zone(self.tzinfo) + zone = get_timezone(self.tzinfo) try: stamp += zone.strftime(' %%Z') @@ -1272,7 +1272,7 @@ cdef class _Timestamp(datetime): cdef: int64_t val val = self.value - if self.tz is not None and not _is_utc(self.tz): + if self.tz is not None and not is_utc(self.tz): val = tz_convert_single(self.value, 'UTC', self.tz) return val @@ -1510,14 +1510,14 @@ cdef convert_to_tsobject(object ts, object tz, object unit, except: pass obj.value = _pydatetime_to_dts(ts, &obj.dts) - ts_offset = _get_utcoffset(ts.tzinfo, ts) + ts_offset = get_utcoffset(ts.tzinfo, ts) obj.value -= _delta_to_nanoseconds(ts_offset) - tz_offset = _get_utcoffset(tz, ts) + tz_offset = get_utcoffset(tz, ts) obj.value += _delta_to_nanoseconds(tz_offset) pandas_datetime_to_datetimestruct(obj.value, PANDAS_FR_ns, &obj.dts) obj.tzinfo = tz - elif not _is_utc(tz): + elif not is_utc(tz): ts = _localize_pydatetime(ts, tz) obj.value = _pydatetime_to_dts(ts, &obj.dts) obj.tzinfo = ts.tzinfo @@ -1529,8 +1529,8 @@ cdef convert_to_tsobject(object ts, object tz, object unit, obj.value = _pydatetime_to_dts(ts, &obj.dts) obj.tzinfo = ts.tzinfo - if obj.tzinfo is not None and not _is_utc(obj.tzinfo): - offset = _get_utcoffset(obj.tzinfo, ts) + if obj.tzinfo is not None and not is_utc(obj.tzinfo): + offset = get_utcoffset(obj.tzinfo, ts) obj.value -= _delta_to_nanoseconds(offset) if is_timestamp(ts): @@ -1641,13 +1641,13 @@ cdef inline void _localize_tso(_TSObject obj, object tz): """ Take a TSObject in UTC and localizes to timezone tz. """ - if _is_utc(tz): + if is_utc(tz): obj.tzinfo = tz - elif _is_tzlocal(tz): + elif is_tzlocal(tz): pandas_datetime_to_datetimestruct(obj.value, PANDAS_FR_ns, &obj.dts) dt = datetime(obj.dts.year, obj.dts.month, obj.dts.day, obj.dts.hour, obj.dts.min, obj.dts.sec, obj.dts.us, tz) - delta = int(_get_utcoffset(tz, dt).total_seconds()) * 1000000000 + delta = int(get_utcoffset(tz, dt).total_seconds()) * 1000000000 if obj.value != NPY_NAT: pandas_datetime_to_datetimestruct(obj.value + delta, PANDAS_FR_ns, &obj.dts) @@ -1671,7 +1671,7 @@ cdef inline void _localize_tso(_TSObject obj, object tz): pandas_datetime_to_datetimestruct( obj.value, PANDAS_FR_ns, &obj.dts) obj.tzinfo = tz - elif _treat_tz_as_pytz(tz): + elif treat_tz_as_pytz(tz): inf = tz._transition_info[pos] if obj.value != NPY_NAT: pandas_datetime_to_datetimestruct(obj.value + deltas[pos], @@ -1680,7 +1680,7 @@ cdef inline void _localize_tso(_TSObject obj, object tz): pandas_datetime_to_datetimestruct(obj.value, PANDAS_FR_ns, &obj.dts) obj.tzinfo = tz._tzinfos[inf] - elif _treat_tz_as_dateutil(tz): + elif treat_tz_as_dateutil(tz): if obj.value != NPY_NAT: pandas_datetime_to_datetimestruct(obj.value + deltas[pos], PANDAS_FR_ns, &obj.dts) @@ -1770,10 +1770,10 @@ def datetime_to_datetime64(ndarray[object] values): elif PyDateTime_Check(val): if val.tzinfo is not None: if inferred_tz is not None: - if _get_zone(val.tzinfo) != inferred_tz: + if get_timezone(val.tzinfo) != inferred_tz: raise ValueError('Array must be all same time zone') else: - inferred_tz = _get_zone(val.tzinfo) + inferred_tz = get_timezone(val.tzinfo) _ts = convert_to_tsobject(val, None, None, 0, 0) iresult[i] = _ts.value @@ -4088,9 +4088,9 @@ def tz_convert(ndarray[int64_t] vals, object tz1, object tz2): return np.array([], dtype=np.int64) # Convert to UTC - if _get_zone(tz1) != 'UTC': + if get_timezone(tz1) != 'UTC': utc_dates = np.empty(n, dtype=np.int64) - if _is_tzlocal(tz1): + if is_tzlocal(tz1): for i in range(n): v = vals[i] if v == NPY_NAT: @@ -4099,7 +4099,7 @@ def tz_convert(ndarray[int64_t] vals, object tz1, object tz2): pandas_datetime_to_datetimestruct(v, PANDAS_FR_ns, &dts) dt = datetime(dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us, tz1) - delta = (int(_get_utcoffset(tz1, dt).total_seconds()) + delta = (int(get_utcoffset(tz1, dt).total_seconds()) * 1000000000) utc_dates[i] = v - delta else: @@ -4126,11 +4126,11 @@ def tz_convert(ndarray[int64_t] vals, object tz1, object tz2): else: utc_dates = vals - if _get_zone(tz2) == 'UTC': + if get_timezone(tz2) == 'UTC': return utc_dates result = np.zeros(n, dtype=np.int64) - if _is_tzlocal(tz2): + if is_tzlocal(tz2): for i in range(n): v = utc_dates[i] if v == NPY_NAT: @@ -4139,7 +4139,7 @@ def tz_convert(ndarray[int64_t] vals, object tz1, object tz2): pandas_datetime_to_datetimestruct(v, PANDAS_FR_ns, &dts) dt = datetime(dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us, tz2) - delta = (int(_get_utcoffset(tz2, dt).total_seconds()) + delta = (int(get_utcoffset(tz2, dt).total_seconds()) * 1000000000) result[i] = v + delta return result @@ -4202,13 +4202,13 @@ def tz_convert_single(int64_t val, object tz1, object tz2): return val # Convert to UTC - if _is_tzlocal(tz1): + if is_tzlocal(tz1): pandas_datetime_to_datetimestruct(val, PANDAS_FR_ns, &dts) dt = datetime(dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us, tz1) - delta = int(_get_utcoffset(tz1, dt).total_seconds()) * 1000000000 + delta = int(get_utcoffset(tz1, dt).total_seconds()) * 1000000000 utc_date = val - delta - elif _get_zone(tz1) != 'UTC': + elif get_timezone(tz1) != 'UTC': trans, deltas, typ = _get_dst_info(tz1) pos = trans.searchsorted(val, side='right') - 1 if pos < 0: @@ -4218,13 +4218,13 @@ def tz_convert_single(int64_t val, object tz1, object tz2): else: utc_date = val - if _get_zone(tz2) == 'UTC': + if get_timezone(tz2) == 'UTC': return utc_date - if _is_tzlocal(tz2): + if is_tzlocal(tz2): pandas_datetime_to_datetimestruct(val, PANDAS_FR_ns, &dts) dt = datetime(dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us, tz2) - delta = int(_get_utcoffset(tz2, dt).total_seconds()) * 1000000000 + delta = int(get_utcoffset(tz2, dt).total_seconds()) * 1000000000 return utc_date + delta # Convert UTC to other timezone @@ -4289,13 +4289,13 @@ cdef object _get_dst_info(object tz): """ cache_key = _tz_cache_key(tz) if cache_key is None: - num = int(_get_utcoffset(tz, None).total_seconds()) * 1000000000 + num = int(get_utcoffset(tz, None).total_seconds()) * 1000000000 return (np.array([NPY_NAT + 1], dtype=np.int64), np.array([num], dtype=np.int64), None) if cache_key not in dst_cache: - if _treat_tz_as_pytz(tz): + if treat_tz_as_pytz(tz): trans = np.array(tz._utc_transition_times, dtype='M8[ns]') trans = trans.view('i8') try: @@ -4306,7 +4306,7 @@ cdef object _get_dst_info(object tz): deltas = _unbox_utcoffsets(tz._transition_info) typ = 'pytz' - elif _treat_tz_as_dateutil(tz): + elif treat_tz_as_dateutil(tz): if len(tz._trans_list): # get utc trans times trans_list = _get_utc_trans_times_from_dateutil_tz(tz) @@ -4336,7 +4336,7 @@ cdef object _get_dst_info(object tz): else: # static tzinfo trans = np.array([NPY_NAT + 1], dtype=np.int64) - num = int(_get_utcoffset(tz, None).total_seconds()) * 1000000000 + num = int(get_utcoffset(tz, None).total_seconds()) * 1000000000 deltas = np.array([num], dtype=np.int64) typ = 'static' @@ -4405,13 +4405,13 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, result = np.empty(n, dtype=np.int64) - if _is_tzlocal(tz): + if is_tzlocal(tz): for i in range(n): v = vals[i] pandas_datetime_to_datetimestruct(v, PANDAS_FR_ns, &dts) dt = datetime(dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us, tz) - delta = int(_get_utcoffset(tz, dt).total_seconds()) * 1000000000 + delta = int(get_utcoffset(tz, dt).total_seconds()) * 1000000000 result[i] = v - delta return result @@ -5116,7 +5116,7 @@ cdef _normalize_local(ndarray[int64_t] stamps, object tz): ndarray[int64_t] trans, deltas, pos pandas_datetimestruct dts - if _is_utc(tz): + if is_utc(tz): with nogil: for i in range(n): if stamps[i] == NPY_NAT: @@ -5125,7 +5125,7 @@ cdef _normalize_local(ndarray[int64_t] stamps, object tz): pandas_datetime_to_datetimestruct( stamps[i], PANDAS_FR_ns, &dts) result[i] = _normalized_stamp(&dts) - elif _is_tzlocal(tz): + elif is_tzlocal(tz): for i in range(n): if stamps[i] == NPY_NAT: result[i] = NPY_NAT @@ -5133,7 +5133,7 @@ cdef _normalize_local(ndarray[int64_t] stamps, object tz): pandas_datetime_to_datetimestruct(stamps[i], PANDAS_FR_ns, &dts) dt = datetime(dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us, tz) - delta = int(_get_utcoffset(tz, dt).total_seconds()) * 1000000000 + delta = int(get_utcoffset(tz, dt).total_seconds()) * 1000000000 pandas_datetime_to_datetimestruct(stamps[i] + delta, PANDAS_FR_ns, &dts) result[i] = _normalized_stamp(&dts) @@ -5180,12 +5180,12 @@ def dates_normalized(ndarray[int64_t] stamps, tz=None): Py_ssize_t i, n = len(stamps) pandas_datetimestruct dts - if tz is None or _is_utc(tz): + if tz is None or is_utc(tz): for i in range(n): pandas_datetime_to_datetimestruct(stamps[i], PANDAS_FR_ns, &dts) if (dts.hour + dts.min + dts.sec + dts.us) > 0: return False - elif _is_tzlocal(tz): + elif is_tzlocal(tz): for i in range(n): pandas_datetime_to_datetimestruct(stamps[i], PANDAS_FR_ns, &dts) dt = datetime(dts.year, dts.month, dts.day, dts.hour, dts.min, diff --git a/pandas/_libs/tslibs/timezones.pxd b/pandas/_libs/tslibs/timezones.pxd index 897bd8af7e2de..ead5566440ca0 100644 --- a/pandas/_libs/tslibs/timezones.pxd +++ b/pandas/_libs/tslibs/timezones.pxd @@ -1,12 +1,12 @@ # -*- coding: utf-8 -*- # cython: profile=False -cdef bint _is_utc(object tz) -cdef bint _is_tzlocal(object tz) +cdef bint is_utc(object tz) +cdef bint is_tzlocal(object tz) -cdef bint _treat_tz_as_pytz(object tz) -cdef bint _treat_tz_as_dateutil(object tz) +cdef bint treat_tz_as_pytz(object tz) +cdef bint treat_tz_as_dateutil(object tz) -cdef object _get_zone(object tz) +cpdef object get_timezone(object tz) -cpdef _get_utcoffset(tzinfo, obj) +cpdef get_utcoffset(tzinfo, obj) diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx index 249eedef4bb09..3db369a09ba2d 100644 --- a/pandas/_libs/tslibs/timezones.pyx +++ b/pandas/_libs/tslibs/timezones.pyx @@ -10,24 +10,24 @@ import pytz UTC = pytz.utc -cdef inline bint _is_utc(object tz): +cdef inline bint is_utc(object tz): return tz is UTC or isinstance(tz, _dateutil_tzutc) -cdef inline bint _is_tzlocal(object tz): +cdef inline bint is_tzlocal(object tz): return isinstance(tz, _dateutil_tzlocal) -cdef inline bint _treat_tz_as_pytz(object tz): +cdef inline bint treat_tz_as_pytz(object tz): return hasattr(tz, '_utc_transition_times') and hasattr( tz, '_transition_info') -cdef inline bint _treat_tz_as_dateutil(object tz): +cdef inline bint treat_tz_as_dateutil(object tz): return hasattr(tz, '_trans_list') and hasattr(tz, '_trans_idx') -cdef inline object _get_zone(object tz): +cpdef inline object get_timezone(object tz): """ We need to do several things here: 1) Distinguish between pytz and dateutil timezones @@ -40,10 +40,10 @@ cdef inline object _get_zone(object tz): the tz name. It needs to be a string so that we can serialize it with UJSON/pytables. maybe_get_tz (below) is the inverse of this process. """ - if _is_utc(tz): + if is_utc(tz): return 'UTC' else: - if _treat_tz_as_dateutil(tz): + if treat_tz_as_dateutil(tz): if '.tar.gz' in tz._filename: raise ValueError( 'Bad tz filename. Dateutil on python 3 on windows has a ' @@ -64,14 +64,10 @@ cdef inline object _get_zone(object tz): except AttributeError: return tz - -def get_timezone(tz): - return _get_zone(tz) - #---------------------------------------------------------------------- # UTC Offsets -cpdef _get_utcoffset(tzinfo, obj): +cpdef get_utcoffset(tzinfo, obj): try: return tzinfo._utcoffset except AttributeError: From 2cf2566de98201454b10b749ac628d538f9695a9 Mon Sep 17 00:00:00 2001 From: jschendel Date: Thu, 14 Sep 2017 04:11:30 -0600 Subject: [PATCH 45/57] Make *_range functions consistent (#17482) --- doc/source/api.rst | 9 + doc/source/timeseries.rst | 9 + doc/source/whatsnew/v0.21.0.txt | 55 +++- pandas/core/indexes/datetimes.py | 58 ++-- pandas/core/indexes/interval.py | 170 ++++++++--- pandas/core/indexes/period.py | 62 +++- pandas/core/indexes/timedeltas.py | 54 +++- .../indexes/datetimes/test_construction.py | 5 +- .../indexes/datetimes/test_date_range.py | 51 +++- .../tests/indexes/period/test_construction.py | 5 +- .../tests/indexes/period/test_period_range.py | 94 ++++++ pandas/tests/indexes/test_interval.py | 279 ++++++++++++++++-- .../indexes/timedeltas/test_construction.py | 5 +- .../timedeltas/test_timedelta_range.py | 21 +- 14 files changed, 747 insertions(+), 130 deletions(-) create mode 100644 pandas/tests/indexes/period/test_period_range.py diff --git a/doc/source/api.rst b/doc/source/api.rst index 27a4ab9cc6cbc..1541bbccefe21 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -218,10 +218,19 @@ Top-level dealing with datetimelike to_timedelta date_range bdate_range + cdate_range period_range timedelta_range infer_freq +Top-level dealing with intervals +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. autosummary:: + :toctree: generated/ + + interval_range + Top-level evaluation ~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst index c86c58c3183f6..5422d5c53043d 100644 --- a/doc/source/timeseries.rst +++ b/doc/source/timeseries.rst @@ -1705,6 +1705,15 @@ has multiplied span. pd.PeriodIndex(start='2014-01', freq='3M', periods=4) +If ``start`` or ``end`` are ``Period`` objects, they will be used as anchor +endpoints for a ``PeriodIndex`` with frequency matching that of the +``PeriodIndex`` constructor. + +.. ipython:: python + + pd.PeriodIndex(start=pd.Period('2017Q1', freq='Q'), + end=pd.Period('2017Q2', freq='Q'), freq='M') + Just like ``DatetimeIndex``, a ``PeriodIndex`` can also be used to index pandas objects: diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 9da1f321ef574..939199d3f6fa6 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -218,7 +218,7 @@ Furthermore this will now correctly box the results of iteration for :func:`Data .. ipython:: ipython d = {'a':[1], 'b':['b']} - df = pd,DataFrame(d) + df = pd.DataFrame(d) Previously: @@ -358,6 +358,59 @@ Previously, :func:`to_datetime` did not localize datetime ``Series`` data when ` Additionally, DataFrames with datetime columns that were parsed by :func:`read_sql_table` and :func:`read_sql_query` will also be localized to UTC only if the original SQL columns were timezone aware datetime columns. +.. _whatsnew_0210.api.consistency_of_range_functions: + +Consistency of Range Functions +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In previous versions, there were some inconsistencies between the various range functions: func:`date_range`, func:`bdate_range`, func:`cdate_range`, func:`period_range`, func:`timedelta_range`, and func:`interval_range`. (:issue:`17471`). + +One of the inconsistent behaviors occurred when the ``start``, ``end`` and ``period`` parameters were all specified, potentially leading to ambiguous ranges. When all three parameters were passed, ``interval_range`` ignored the ``period`` parameter, ``period_range`` ignored the ``end`` parameter, and the other range functions raised. To promote consistency among the range functions, and avoid potentially ambiguous ranges, ``interval_range`` and ``period_range`` will now raise when all three parameters are passed. + +Previous Behavior: + +.. code-block:: ipython + + In [2]: pd.interval_range(start=0, end=4, periods=6) + Out[2]: + IntervalIndex([(0, 1], (1, 2], (2, 3]] + closed='right', + dtype='interval[int64]') + + In [3]: pd.period_range(start='2017Q1', end='2017Q4', periods=6, freq='Q') + Out[3]: PeriodIndex(['2017Q1', '2017Q2', '2017Q3', '2017Q4', '2018Q1', '2018Q2'], dtype='period[Q-DEC]', freq='Q-DEC') + +New Behavior: + +.. code-block:: ipython + + In [2]: pd.interval_range(start=0, end=4, periods=6) + --------------------------------------------------------------------------- + ValueError: Of the three parameters: start, end, and periods, exactly two must be specified + + In [3]: pd.period_range(start='2017Q1', end='2017Q4', periods=6, freq='Q') + --------------------------------------------------------------------------- + ValueError: Of the three parameters: start, end, and periods, exactly two must be specified + +Additionally, the endpoint parameter ``end`` was not included in the intervals produced by ``interval_range``. However, all other range functions include ``end`` in their output. To promote consistency among the range functions, ``interval_range`` will now include ``end`` as the right endpoint of the final interval, except if ``freq`` is specified in a way which skips ``end``. + +Previous Behavior: + +.. code-block:: ipython + + In [4]: pd.interval_range(start=0, end=4) + Out[4]: + IntervalIndex([(0, 1], (1, 2], (2, 3]] + closed='right', + dtype='interval[int64]') + + +New Behavior: + + .. ipython:: python + + pd.interval_range(start=0, end=4) + .. _whatsnew_0210.api: Other API Changes diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 4cfb7547e7d0a..1c8d0b334b91c 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -292,8 +292,8 @@ def __new__(cls, data=None, if is_float(periods): periods = int(periods) elif not is_integer(periods): - raise ValueError('Periods must be a number, got %s' % - str(periods)) + msg = 'periods must be a number, got {periods}' + raise TypeError(msg.format(periods=periods)) if data is None and freq is None: raise ValueError("Must provide freq argument if no data is " @@ -412,7 +412,8 @@ def __new__(cls, data=None, def _generate(cls, start, end, periods, name, offset, tz=None, normalize=False, ambiguous='raise', closed=None): if com._count_not_none(start, end, periods) != 2: - raise ValueError('Must specify two of start, end, or periods') + raise ValueError('Of the three parameters: start, end, and ' + 'periods, exactly two must be specified') _normalized = True @@ -2004,7 +2005,7 @@ def _generate_regular_range(start, end, periods, offset): def date_range(start=None, end=None, periods=None, freq='D', tz=None, normalize=False, name=None, closed=None, **kwargs): """ - Return a fixed frequency datetime index, with day (calendar) as the default + Return a fixed frequency DatetimeIndex, with day (calendar) as the default frequency Parameters @@ -2013,24 +2014,25 @@ def date_range(start=None, end=None, periods=None, freq='D', tz=None, Left bound for generating dates end : string or datetime-like, default None Right bound for generating dates - periods : integer or None, default None - If None, must specify start and end + periods : integer, default None + Number of periods to generate freq : string or DateOffset, default 'D' (calendar daily) Frequency strings can have multiples, e.g. '5H' - tz : string or None + tz : string, default None Time zone name for returning localized DatetimeIndex, for example Asia/Hong_Kong normalize : bool, default False Normalize start/end dates to midnight before generating date range - name : str, default None - Name of the resulting index - closed : string or None, default None + name : string, default None + Name of the resulting DatetimeIndex + closed : string, default None Make the interval closed with respect to the given frequency to the 'left', 'right', or both sides (None) Notes ----- - 2 of start, end, or periods must be specified + Of the three parameters: ``start``, ``end``, and ``periods``, exactly two + must be specified. To learn more about the frequency strings, please see `this link `__. @@ -2047,7 +2049,7 @@ def date_range(start=None, end=None, periods=None, freq='D', tz=None, def bdate_range(start=None, end=None, periods=None, freq='B', tz=None, normalize=True, name=None, closed=None, **kwargs): """ - Return a fixed frequency datetime index, with business day as the default + Return a fixed frequency DatetimeIndex, with business day as the default frequency Parameters @@ -2056,8 +2058,8 @@ def bdate_range(start=None, end=None, periods=None, freq='B', tz=None, Left bound for generating dates end : string or datetime-like, default None Right bound for generating dates - periods : integer or None, default None - If None, must specify start and end + periods : integer, default None + Number of periods to generate freq : string or DateOffset, default 'B' (business daily) Frequency strings can have multiples, e.g. '5H' tz : string or None @@ -2065,15 +2067,16 @@ def bdate_range(start=None, end=None, periods=None, freq='B', tz=None, Asia/Beijing normalize : bool, default False Normalize start/end dates to midnight before generating date range - name : str, default None - Name for the resulting index - closed : string or None, default None + name : string, default None + Name of the resulting DatetimeIndex + closed : string, default None Make the interval closed with respect to the given frequency to the 'left', 'right', or both sides (None) Notes ----- - 2 of start, end, or periods must be specified + Of the three parameters: ``start``, ``end``, and ``periods``, exactly two + must be specified. To learn more about the frequency strings, please see `this link `__. @@ -2091,7 +2094,7 @@ def bdate_range(start=None, end=None, periods=None, freq='B', tz=None, def cdate_range(start=None, end=None, periods=None, freq='C', tz=None, normalize=True, name=None, closed=None, **kwargs): """ - **EXPERIMENTAL** Return a fixed frequency datetime index, with + **EXPERIMENTAL** Return a fixed frequency DatetimeIndex, with CustomBusinessDay as the default frequency .. warning:: EXPERIMENTAL @@ -2105,29 +2108,30 @@ def cdate_range(start=None, end=None, periods=None, freq='C', tz=None, Left bound for generating dates end : string or datetime-like, default None Right bound for generating dates - periods : integer or None, default None - If None, must specify start and end + periods : integer, default None + Number of periods to generate freq : string or DateOffset, default 'C' (CustomBusinessDay) Frequency strings can have multiples, e.g. '5H' - tz : string or None + tz : string, default None Time zone name for returning localized DatetimeIndex, for example Asia/Beijing normalize : bool, default False Normalize start/end dates to midnight before generating date range - name : str, default None - Name for the resulting index - weekmask : str, Default 'Mon Tue Wed Thu Fri' + name : string, default None + Name of the resulting DatetimeIndex + weekmask : string, Default 'Mon Tue Wed Thu Fri' weekmask of valid business days, passed to ``numpy.busdaycalendar`` holidays : list list/array of dates to exclude from the set of valid business days, passed to ``numpy.busdaycalendar`` - closed : string or None, default None + closed : string, default None Make the interval closed with respect to the given frequency to the 'left', 'right', or both sides (None) Notes ----- - 2 of start, end, or periods must be specified + Of the three parameters: ``start``, ``end``, and ``periods``, exactly two + must be specified. To learn more about the frequency strings, please see `this link `__. diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index e0ed6c7ea35c0..6e80f6c900386 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -15,6 +15,8 @@ is_float_dtype, is_interval_dtype, is_scalar, + is_float, + is_number, is_integer) from pandas.core.indexes.base import ( Index, _ensure_index, @@ -25,11 +27,15 @@ Interval, IntervalMixin, IntervalTree, intervals_to_interval_bounds) +from pandas.core.indexes.datetimes import date_range +from pandas.core.indexes.timedeltas import timedelta_range from pandas.core.indexes.multi import MultiIndex from pandas.compat.numpy import function as nv from pandas.core import common as com from pandas.util._decorators import cache_readonly, Appender from pandas.core.config import get_option +from pandas.tseries.frequencies import to_offset +from pandas.tseries.offsets import DateOffset import pandas.core.indexes.base as ibase _index_doc_kwargs = dict(ibase._index_doc_kwargs) @@ -1028,54 +1034,152 @@ def func(self, other): IntervalIndex._add_logical_methods_disabled() -def interval_range(start=None, end=None, freq=None, periods=None, - name=None, closed='right', **kwargs): +def _is_valid_endpoint(endpoint): + """helper for interval_range to check if start/end are valid types""" + return any([is_number(endpoint), + isinstance(endpoint, Timestamp), + isinstance(endpoint, Timedelta), + endpoint is None]) + + +def _is_type_compatible(a, b): + """helper for interval_range to check type compat of start/end/freq""" + is_ts_compat = lambda x: isinstance(x, (Timestamp, DateOffset)) + is_td_compat = lambda x: isinstance(x, (Timedelta, DateOffset)) + return ((is_number(a) and is_number(b)) or + (is_ts_compat(a) and is_ts_compat(b)) or + (is_td_compat(a) and is_td_compat(b)) or + com._any_none(a, b)) + + +def interval_range(start=None, end=None, periods=None, freq=None, + name=None, closed='right'): """ Return a fixed frequency IntervalIndex Parameters ---------- - start : string or datetime-like, default None - Left bound for generating data - end : string or datetime-like, default None - Right bound for generating data - freq : interger, string or DateOffset, default 1 - periods : interger, default None - name : str, default None - Name of the resulting index + start : numeric or datetime-like, default None + Left bound for generating intervals + end : numeric or datetime-like, default None + Right bound for generating intervals + periods : integer, default None + Number of periods to generate + freq : numeric, string, or DateOffset, default None + The length of each interval. Must be consistent with the type of start + and end, e.g. 2 for numeric, or '5H' for datetime-like. Default is 1 + for numeric and 'D' (calendar daily) for datetime-like. + name : string, default None + Name of the resulting IntervalIndex closed : string, default 'right' options are: 'left', 'right', 'both', 'neither' Notes ----- - 2 of start, end, or periods must be specified + Of the three parameters: ``start``, ``end``, and ``periods``, exactly two + must be specified. Returns ------- rng : IntervalIndex + + Examples + -------- + + Numeric ``start`` and ``end`` is supported. + + >>> pd.interval_range(start=0, end=5) + IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]] + closed='right', dtype='interval[int64]') + + Additionally, datetime-like input is also supported. + + >>> pd.interval_range(start='2017-01-01', end='2017-01-04') + IntervalIndex([(2017-01-01, 2017-01-02], (2017-01-02, 2017-01-03], + (2017-01-03, 2017-01-04]] + closed='right', dtype='interval[datetime64[ns]]') + + The ``freq`` parameter specifies the frequency between the left and right. + endpoints of the individual intervals within the ``IntervalIndex``. For + numeric ``start`` and ``end``, the frequency must also be numeric. + + >>> pd.interval_range(start=0, periods=4, freq=1.5) + IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]] + closed='right', dtype='interval[float64]') + + Similarly, for datetime-like ``start`` and ``end``, the frequency must be + convertible to a DateOffset. + + >>> pd.interval_range(start='2017-01-01', periods=3, freq='MS') + IntervalIndex([(2017-01-01, 2017-02-01], (2017-02-01, 2017-03-01], + (2017-03-01, 2017-04-01]] + closed='right', dtype='interval[datetime64[ns]]') + + The ``closed`` parameter specifies which endpoints of the individual + intervals within the ``IntervalIndex`` are closed. + + >>> pd.interval_range(end=5, periods=4, closed='both') + IntervalIndex([[1, 2], [2, 3], [3, 4], [4, 5]] + closed='both', dtype='interval[int64]') """ + if com._count_not_none(start, end, periods) != 2: + raise ValueError('Of the three parameters: start, end, and periods, ' + 'exactly two must be specified') + + start = com._maybe_box_datetimelike(start) + end = com._maybe_box_datetimelike(end) + endpoint = next(com._not_none(start, end)) + + if not _is_valid_endpoint(start): + msg = 'start must be numeric or datetime-like, got {start}' + raise ValueError(msg.format(start=start)) + + if not _is_valid_endpoint(end): + msg = 'end must be numeric or datetime-like, got {end}' + raise ValueError(msg.format(end=end)) + + if is_float(periods): + periods = int(periods) + elif not is_integer(periods) and periods is not None: + msg = 'periods must be a number, got {periods}' + raise TypeError(msg.format(periods=periods)) + + freq = freq or (1 if is_number(endpoint) else 'D') + if not is_number(freq): + try: + freq = to_offset(freq) + except ValueError: + raise ValueError('freq must be numeric or convertible to ' + 'DateOffset, got {freq}'.format(freq=freq)) - if freq is None: - freq = 1 + # verify type compatibility + if not all([_is_type_compatible(start, end), + _is_type_compatible(start, freq), + _is_type_compatible(end, freq)]): + raise TypeError("start, end, freq need to be type compatible") - if start is None: - if periods is None or end is None: - raise ValueError("must specify 2 of start, end, periods") - start = end - periods * freq - if end is None: - if periods is None or start is None: - raise ValueError("must specify 2 of start, end, periods") + if is_number(endpoint): + if periods is None: + periods = int((end - start) // freq) + + if start is None: + start = end - periods * freq + + # force end to be consistent with freq (lower if freq skips over end) end = start + periods * freq - if periods is None: - if start is None or end is None: - raise ValueError("must specify 2 of start, end, periods") - pass - - # must all be same units or None - arr = np.array([start, end, freq]) - if is_object_dtype(arr): - raise ValueError("start, end, freq need to be the same type") - - return IntervalIndex.from_breaks(np.arange(start, end, freq), - name=name, - closed=closed) + + # end + freq for inclusive endpoint + breaks = np.arange(start, end + freq, freq) + elif isinstance(endpoint, Timestamp): + # add one to account for interval endpoints (n breaks = n-1 intervals) + if periods is not None: + periods += 1 + breaks = date_range(start=start, end=end, periods=periods, freq=freq) + else: + # add one to account for interval endpoints (n breaks = n-1 intervals) + if periods is not None: + periods += 1 + breaks = timedelta_range(start=start, end=end, periods=periods, + freq=freq) + + return IntervalIndex.from_breaks(breaks, name=name, closed=closed) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 0915462d4d421..fb47d1db48610 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -199,8 +199,8 @@ def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None, if is_float(periods): periods = int(periods) elif not is_integer(periods): - raise ValueError('Periods must be a number, got %s' % - str(periods)) + msg = 'periods must be a number, got {periods}' + raise TypeError(msg.format(periods=periods)) if name is None and hasattr(data, 'name'): name = data.name @@ -1051,8 +1051,9 @@ def tz_localize(self, tz, infer_dst=False): def _get_ordinal_range(start, end, periods, freq, mult=1): - if com._count_not_none(start, end, periods) < 2: - raise ValueError('Must specify 2 of start, end, periods') + if com._count_not_none(start, end, periods) != 2: + raise ValueError('Of the three parameters: start, end, and periods, ' + 'exactly two must be specified') if freq is not None: _, mult = _gfc(freq) @@ -1066,9 +1067,9 @@ def _get_ordinal_range(start, end, periods, freq, mult=1): is_end_per = isinstance(end, Period) if is_start_per and is_end_per and start.freq != end.freq: - raise ValueError('Start and end must have same freq') + raise ValueError('start and end must have same freq') if (start is tslib.NaT or end is tslib.NaT): - raise ValueError('Start and end must not be NaT') + raise ValueError('start and end must not be NaT') if freq is None: if is_start_per: @@ -1157,24 +1158,55 @@ def pnow(freq=None): def period_range(start=None, end=None, periods=None, freq='D', name=None): """ - Return a fixed frequency datetime index, with day (calendar) as the default + Return a fixed frequency PeriodIndex, with day (calendar) as the default frequency - Parameters ---------- - start : starting value, period-like, optional - end : ending value, period-like, optional - periods : int, default None - Number of periods in the index - freq : str/DateOffset, default 'D' + start : string or period-like, default None + Left bound for generating periods + end : string or period-like, default None + Right bound for generating periods + periods : integer, default None + Number of periods to generate + freq : string or DateOffset, default 'D' (calendar daily) Frequency alias - name : str, default None - Name for the resulting PeriodIndex + name : string, default None + Name of the resulting PeriodIndex + + Notes + ----- + Of the three parameters: ``start``, ``end``, and ``periods``, exactly two + must be specified. + + To learn more about the frequency strings, please see `this link + `__. Returns ------- prng : PeriodIndex + + Examples + -------- + + >>> pd.period_range(start='2017-01-01', end='2018-01-01', freq='M') + PeriodIndex(['2017-01', '2017-02', '2017-03', '2017-04', '2017-05', + '2017-06', '2017-06', '2017-07', '2017-08', '2017-09', + '2017-10', '2017-11', '2017-12', '2018-01'], + dtype='period[M]', freq='M') + + If ``start`` or ``end`` are ``Period`` objects, they will be used as anchor + endpoints for a ``PeriodIndex`` with frequency matching that of the + ``period_range`` constructor. + + >>> pd.period_range(start=pd.Period('2017Q1', freq='Q'), + ... end=pd.Period('2017Q2', freq='Q'), freq='M') + PeriodIndex(['2017-03', '2017-04', '2017-05', '2017-06'], + dtype='period[M]', freq='M') """ + if com._count_not_none(start, end, periods) != 2: + raise ValueError('Of the three parameters: start, end, and periods, ' + 'exactly two must be specified') + return PeriodIndex(start=start, end=end, periods=periods, freq=freq, name=name) diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 2823951c0f348..d7b7d56d74a3a 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -180,8 +180,8 @@ def __new__(cls, data=None, unit=None, if is_float(periods): periods = int(periods) elif not is_integer(periods): - raise ValueError('Periods must be a number, got %s' % - str(periods)) + msg = 'periods must be a number, got {periods}' + raise TypeError(msg.format(periods=periods)) if data is None and freq is None: raise ValueError("Must provide freq argument if no data is " @@ -234,7 +234,8 @@ def __new__(cls, data=None, unit=None, @classmethod def _generate(cls, start, end, periods, name, offset, closed=None): if com._count_not_none(start, end, periods) != 2: - raise ValueError('Must specify two of start, end, or periods') + raise ValueError('Of the three parameters: start, end, and ' + 'periods, exactly two must be specified') if start is not None: start = Timedelta(start) @@ -960,22 +961,22 @@ def _generate_regular_range(start, end, periods, offset): def timedelta_range(start=None, end=None, periods=None, freq='D', name=None, closed=None): """ - Return a fixed frequency timedelta index, with day as the default + Return a fixed frequency TimedeltaIndex, with day as the default frequency Parameters ---------- start : string or timedelta-like, default None - Left bound for generating dates - end : string or datetime-like, default None - Right bound for generating dates - periods : integer or None, default None - If None, must specify start and end + Left bound for generating timedeltas + end : string or timedelta-like, default None + Right bound for generating timedeltas + periods : integer, default None + Number of periods to generate freq : string or DateOffset, default 'D' (calendar daily) Frequency strings can have multiples, e.g. '5H' - name : str, default None - Name of the resulting index - closed : string or None, default None + name : string, default None + Name of the resulting TimedeltaIndex + closed : string, default None Make the interval closed with respect to the given frequency to the 'left', 'right', or both sides (None) @@ -985,11 +986,34 @@ def timedelta_range(start=None, end=None, periods=None, freq='D', Notes ----- - 2 of start, end, or periods must be specified. + Of the three parameters: ``start``, ``end``, and ``periods``, exactly two + must be specified. To learn more about the frequency strings, please see `this link `__. + + Examples + -------- + + >>> pd.timedelta_range(start='1 day', periods=4) + TimedeltaIndex(['1 days', '2 days', '3 days', '4 days'], + dtype='timedelta64[ns]', freq='D') + + The ``closed`` parameter specifies which endpoint is included. The default + behavior is to include both endpoints. + + >>> pd.timedelta_range(start='1 day', periods=4, closed='right') + TimedeltaIndex(['2 days', '3 days', '4 days'], + dtype='timedelta64[ns]', freq='D') + + The ``freq`` parameter specifies the frequency of the TimedeltaIndex. + Only fixed frequencies can be passed, non-fixed frequencies such as + 'M' (month end) will raise. + + >>> pd.timedelta_range(start='1 day', end='2 days', freq='6H') + TimedeltaIndex(['1 days 00:00:00', '1 days 06:00:00', '1 days 12:00:00', + '1 days 18:00:00', '2 days 00:00:00'], + dtype='timedelta64[ns]', freq='6H') """ return TimedeltaIndex(start=start, end=end, periods=periods, - freq=freq, name=name, - closed=closed) + freq=freq, name=name, closed=closed) diff --git a/pandas/tests/indexes/datetimes/test_construction.py b/pandas/tests/indexes/datetimes/test_construction.py index cf896b06130a2..a4706dd8a3767 100644 --- a/pandas/tests/indexes/datetimes/test_construction.py +++ b/pandas/tests/indexes/datetimes/test_construction.py @@ -307,8 +307,9 @@ def test_constructor_coverage(self): exp = date_range('1/1/2000', periods=10) tm.assert_index_equal(rng, exp) - pytest.raises(ValueError, DatetimeIndex, start='1/1/2000', - periods='foo', freq='D') + msg = 'periods must be a number, got foo' + with tm.assert_raises_regex(TypeError, msg): + DatetimeIndex(start='1/1/2000', periods='foo', freq='D') pytest.raises(ValueError, DatetimeIndex, start='1/1/2000', end='1/10/2000') diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index da4ca83c10dda..8d86bebdd4d5e 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -107,8 +107,10 @@ def test_date_range_ambiguous_arguments(self): start = datetime(2011, 1, 1, 5, 3, 40) end = datetime(2011, 1, 1, 8, 9, 40) - pytest.raises(ValueError, date_range, start, end, freq='s', - periods=10) + msg = ('Of the three parameters: start, end, and periods, ' + 'exactly two must be specified') + with tm.assert_raises_regex(ValueError, msg): + date_range(start, end, periods=10, freq='s') def test_date_range_businesshour(self): idx = DatetimeIndex(['2014-07-04 09:00', '2014-07-04 10:00', @@ -146,14 +148,29 @@ def test_date_range_businesshour(self): def test_range_misspecified(self): # GH #1095 + msg = ('Of the three parameters: start, end, and periods, ' + 'exactly two must be specified') + + with tm.assert_raises_regex(ValueError, msg): + date_range(start='1/1/2000') + + with tm.assert_raises_regex(ValueError, msg): + date_range(end='1/1/2000') + + with tm.assert_raises_regex(ValueError, msg): + date_range(periods=10) + + with tm.assert_raises_regex(ValueError, msg): + date_range(start='1/1/2000', freq='H') - pytest.raises(ValueError, date_range, '1/1/2000') - pytest.raises(ValueError, date_range, end='1/1/2000') - pytest.raises(ValueError, date_range, periods=10) + with tm.assert_raises_regex(ValueError, msg): + date_range(end='1/1/2000', freq='H') - pytest.raises(ValueError, date_range, '1/1/2000', freq='H') - pytest.raises(ValueError, date_range, end='1/1/2000', freq='H') - pytest.raises(ValueError, date_range, periods=10, freq='H') + with tm.assert_raises_regex(ValueError, msg): + date_range(periods=10, freq='H') + + with tm.assert_raises_regex(ValueError, msg): + date_range() def test_compat_replace(self): # https://github.com/statsmodels/statsmodels/issues/3349 @@ -231,8 +248,13 @@ def test_constructor(self): bdate_range(START, END, freq=BDay()) bdate_range(START, periods=20, freq=BDay()) bdate_range(end=START, periods=20, freq=BDay()) - pytest.raises(ValueError, date_range, '2011-1-1', '2012-1-1', 'B') - pytest.raises(ValueError, bdate_range, '2011-1-1', '2012-1-1', 'B') + + msg = 'periods must be a number, got B' + with tm.assert_raises_regex(TypeError, msg): + date_range('2011-1-1', '2012-1-1', 'B') + + with tm.assert_raises_regex(TypeError, msg): + bdate_range('2011-1-1', '2012-1-1', 'B') def test_naive_aware_conflicts(self): naive = bdate_range(START, END, freq=BDay(), tz=None) @@ -510,8 +532,13 @@ def test_constructor(self): cdate_range(START, END, freq=CDay()) cdate_range(START, periods=20, freq=CDay()) cdate_range(end=START, periods=20, freq=CDay()) - pytest.raises(ValueError, date_range, '2011-1-1', '2012-1-1', 'C') - pytest.raises(ValueError, cdate_range, '2011-1-1', '2012-1-1', 'C') + + msg = 'periods must be a number, got C' + with tm.assert_raises_regex(TypeError, msg): + date_range('2011-1-1', '2012-1-1', 'C') + + with tm.assert_raises_regex(TypeError, msg): + cdate_range('2011-1-1', '2012-1-1', 'C') def test_cached_range(self): DatetimeIndex._cached_range(START, END, offset=CDay()) diff --git a/pandas/tests/indexes/period/test_construction.py b/pandas/tests/indexes/period/test_construction.py index e5b889e100307..639a9272c3808 100644 --- a/pandas/tests/indexes/period/test_construction.py +++ b/pandas/tests/indexes/period/test_construction.py @@ -436,11 +436,12 @@ def test_constructor_error(self): start = Period('02-Apr-2005', 'B') end_intv = Period('2006-12-31', ('w', 1)) - msg = 'Start and end must have same freq' + msg = 'start and end must have same freq' with tm.assert_raises_regex(ValueError, msg): PeriodIndex(start=start, end=end_intv) - msg = 'Must specify 2 of start, end, periods' + msg = ('Of the three parameters: start, end, and periods, ' + 'exactly two must be specified') with tm.assert_raises_regex(ValueError, msg): PeriodIndex(start=start) diff --git a/pandas/tests/indexes/period/test_period_range.py b/pandas/tests/indexes/period/test_period_range.py new file mode 100644 index 0000000000000..640f24f67f72f --- /dev/null +++ b/pandas/tests/indexes/period/test_period_range.py @@ -0,0 +1,94 @@ +import pytest +import pandas.util.testing as tm +from pandas import date_range, NaT, period_range, Period, PeriodIndex + + +class TestPeriodRange(object): + + @pytest.mark.parametrize('freq', ['D', 'W', 'M', 'Q', 'A']) + def test_construction_from_string(self, freq): + # non-empty + expected = date_range(start='2017-01-01', periods=5, + freq=freq, name='foo').to_period() + start, end = str(expected[0]), str(expected[-1]) + + result = period_range(start=start, end=end, freq=freq, name='foo') + tm.assert_index_equal(result, expected) + + result = period_range(start=start, periods=5, freq=freq, name='foo') + tm.assert_index_equal(result, expected) + + result = period_range(end=end, periods=5, freq=freq, name='foo') + tm.assert_index_equal(result, expected) + + # empty + expected = PeriodIndex([], freq=freq, name='foo') + + result = period_range(start=start, periods=0, freq=freq, name='foo') + tm.assert_index_equal(result, expected) + + result = period_range(end=end, periods=0, freq=freq, name='foo') + tm.assert_index_equal(result, expected) + + result = period_range(start=end, end=start, freq=freq, name='foo') + tm.assert_index_equal(result, expected) + + def test_construction_from_period(self): + # upsampling + start, end = Period('2017Q1', freq='Q'), Period('2018Q1', freq='Q') + expected = date_range(start='2017-03-31', end='2018-03-31', freq='M', + name='foo').to_period() + result = period_range(start=start, end=end, freq='M', name='foo') + tm.assert_index_equal(result, expected) + + # downsampling + start, end = Period('2017-1', freq='M'), Period('2019-12', freq='M') + expected = date_range(start='2017-01-31', end='2019-12-31', freq='Q', + name='foo').to_period() + result = period_range(start=start, end=end, freq='Q', name='foo') + tm.assert_index_equal(result, expected) + + # empty + expected = PeriodIndex([], freq='W', name='foo') + + result = period_range(start=start, periods=0, freq='W', name='foo') + tm.assert_index_equal(result, expected) + + result = period_range(end=end, periods=0, freq='W', name='foo') + tm.assert_index_equal(result, expected) + + result = period_range(start=end, end=start, freq='W', name='foo') + tm.assert_index_equal(result, expected) + + def test_errors(self): + # not enough params + msg = ('Of the three parameters: start, end, and periods, ' + 'exactly two must be specified') + with tm.assert_raises_regex(ValueError, msg): + period_range(start='2017Q1') + + with tm.assert_raises_regex(ValueError, msg): + period_range(end='2017Q1') + + with tm.assert_raises_regex(ValueError, msg): + period_range(periods=5) + + with tm.assert_raises_regex(ValueError, msg): + period_range() + + # too many params + with tm.assert_raises_regex(ValueError, msg): + period_range(start='2017Q1', end='2018Q1', periods=8, freq='Q') + + # start/end NaT + msg = 'start and end must not be NaT' + with tm.assert_raises_regex(ValueError, msg): + period_range(start=NaT, end='2018Q1') + + with tm.assert_raises_regex(ValueError, msg): + period_range(start='2017Q1', end=NaT) + + # invalid periods param + msg = 'periods must be a number, got foo' + with tm.assert_raises_regex(TypeError, msg): + period_range(start='2017Q1', periods='foo') diff --git a/pandas/tests/indexes/test_interval.py b/pandas/tests/indexes/test_interval.py index 18eefc3fbdca6..13c3b35e4d85d 100644 --- a/pandas/tests/indexes/test_interval.py +++ b/pandas/tests/indexes/test_interval.py @@ -2,10 +2,11 @@ import pytest import numpy as np - +from datetime import timedelta from pandas import (Interval, IntervalIndex, Index, isna, interval_range, Timestamp, Timedelta, - compat) + compat, date_range, timedelta_range, DateOffset) +from pandas.tseries.offsets import Day from pandas._libs.interval import IntervalTree from pandas.tests.indexes.common import Base import pandas.util.testing as tm @@ -721,40 +722,278 @@ def test_is_non_overlapping_monotonic(self): class TestIntervalRange(object): - def test_construction(self): - result = interval_range(0, 5, name='foo', closed='both') + @pytest.mark.parametrize('closed', ['left', 'right', 'neither', 'both']) + def test_construction_from_numeric(self, closed): + # combinations of start/end/periods without freq expected = IntervalIndex.from_breaks( - np.arange(0, 5), name='foo', closed='both') + np.arange(0, 6), name='foo', closed=closed) + + result = interval_range(start=0, end=5, name='foo', closed=closed) tm.assert_index_equal(result, expected) - def test_errors(self): + result = interval_range(start=0, periods=5, name='foo', closed=closed) + tm.assert_index_equal(result, expected) + + result = interval_range(end=5, periods=5, name='foo', closed=closed) + tm.assert_index_equal(result, expected) + + # combinations of start/end/periods with freq + expected = IntervalIndex.from_tuples([(0, 2), (2, 4), (4, 6)], + name='foo', closed=closed) + + result = interval_range(start=0, end=6, freq=2, name='foo', + closed=closed) + tm.assert_index_equal(result, expected) + + result = interval_range(start=0, periods=3, freq=2, name='foo', + closed=closed) + tm.assert_index_equal(result, expected) + + result = interval_range(end=6, periods=3, freq=2, name='foo', + closed=closed) + tm.assert_index_equal(result, expected) + + # output truncates early if freq causes end to be skipped. + expected = IntervalIndex.from_tuples([(0.0, 1.5), (1.5, 3.0)], + name='foo', closed=closed) + result = interval_range(start=0, end=4, freq=1.5, name='foo', + closed=closed) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize('closed', ['left', 'right', 'neither', 'both']) + def test_construction_from_timestamp(self, closed): + # combinations of start/end/periods without freq + start, end = Timestamp('2017-01-01'), Timestamp('2017-01-06') + breaks = date_range(start=start, end=end) + expected = IntervalIndex.from_breaks(breaks, name='foo', closed=closed) + + result = interval_range(start=start, end=end, name='foo', + closed=closed) + tm.assert_index_equal(result, expected) + + result = interval_range(start=start, periods=5, name='foo', + closed=closed) + tm.assert_index_equal(result, expected) + + result = interval_range(end=end, periods=5, name='foo', + closed=closed) + tm.assert_index_equal(result, expected) + + # combinations of start/end/periods with fixed freq + freq = '2D' + start, end = Timestamp('2017-01-01'), Timestamp('2017-01-07') + breaks = date_range(start=start, end=end, freq=freq) + expected = IntervalIndex.from_breaks(breaks, name='foo', closed=closed) + + result = interval_range(start=start, end=end, freq=freq, name='foo', + closed=closed) + tm.assert_index_equal(result, expected) + + result = interval_range(start=start, periods=3, freq=freq, name='foo', + closed=closed) + tm.assert_index_equal(result, expected) + + result = interval_range(end=end, periods=3, freq=freq, name='foo', + closed=closed) + tm.assert_index_equal(result, expected) + + # output truncates early if freq causes end to be skipped. + end = Timestamp('2017-01-08') + result = interval_range(start=start, end=end, freq=freq, name='foo', + closed=closed) + tm.assert_index_equal(result, expected) + + # combinations of start/end/periods with non-fixed freq + freq = 'M' + start, end = Timestamp('2017-01-01'), Timestamp('2017-12-31') + breaks = date_range(start=start, end=end, freq=freq) + expected = IntervalIndex.from_breaks(breaks, name='foo', closed=closed) + + result = interval_range(start=start, end=end, freq=freq, name='foo', + closed=closed) + tm.assert_index_equal(result, expected) + + result = interval_range(start=start, periods=11, freq=freq, name='foo', + closed=closed) + tm.assert_index_equal(result, expected) + + result = interval_range(end=end, periods=11, freq=freq, name='foo', + closed=closed) + tm.assert_index_equal(result, expected) + + # output truncates early if freq causes end to be skipped. + end = Timestamp('2018-01-15') + result = interval_range(start=start, end=end, freq=freq, name='foo', + closed=closed) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize('closed', ['left', 'right', 'neither', 'both']) + def test_construction_from_timedelta(self, closed): + # combinations of start/end/periods without freq + start, end = Timedelta('1 day'), Timedelta('6 days') + breaks = timedelta_range(start=start, end=end) + expected = IntervalIndex.from_breaks(breaks, name='foo', closed=closed) + + result = interval_range(start=start, end=end, name='foo', + closed=closed) + tm.assert_index_equal(result, expected) + + result = interval_range(start=start, periods=5, name='foo', + closed=closed) + tm.assert_index_equal(result, expected) + + result = interval_range(end=end, periods=5, name='foo', + closed=closed) + tm.assert_index_equal(result, expected) + + # combinations of start/end/periods with fixed freq + freq = '2D' + start, end = Timedelta('1 day'), Timedelta('7 days') + breaks = timedelta_range(start=start, end=end, freq=freq) + expected = IntervalIndex.from_breaks(breaks, name='foo', closed=closed) + + result = interval_range(start=start, end=end, freq=freq, name='foo', + closed=closed) + tm.assert_index_equal(result, expected) + + result = interval_range(start=start, periods=3, freq=freq, name='foo', + closed=closed) + tm.assert_index_equal(result, expected) + + result = interval_range(end=end, periods=3, freq=freq, name='foo', + closed=closed) + tm.assert_index_equal(result, expected) + + # output truncates early if freq causes end to be skipped. + end = Timedelta('7 days 1 hour') + result = interval_range(start=start, end=end, freq=freq, name='foo', + closed=closed) + tm.assert_index_equal(result, expected) + + def test_constructor_coverage(self): + # float value for periods + expected = pd.interval_range(start=0, periods=10) + result = pd.interval_range(start=0, periods=10.5) + tm.assert_index_equal(result, expected) + + # equivalent timestamp-like start/end + start, end = Timestamp('2017-01-01'), Timestamp('2017-01-15') + expected = pd.interval_range(start=start, end=end) + + result = pd.interval_range(start=start.to_pydatetime(), + end=end.to_pydatetime()) + tm.assert_index_equal(result, expected) + + result = pd.interval_range(start=start.tz_localize('UTC'), + end=end.tz_localize('UTC')) + tm.assert_index_equal(result, expected) + + result = pd.interval_range(start=start.asm8, end=end.asm8) + tm.assert_index_equal(result, expected) + + # equivalent freq with timestamp + equiv_freq = ['D', Day(), Timedelta(days=1), timedelta(days=1), + DateOffset(days=1)] + for freq in equiv_freq: + result = pd.interval_range(start=start, end=end, freq=freq) + tm.assert_index_equal(result, expected) + + # equivalent timedelta-like start/end + start, end = Timedelta(days=1), Timedelta(days=10) + expected = pd.interval_range(start=start, end=end) + + result = pd.interval_range(start=start.to_pytimedelta(), + end=end.to_pytimedelta()) + tm.assert_index_equal(result, expected) + + result = pd.interval_range(start=start.asm8, end=end.asm8) + tm.assert_index_equal(result, expected) + + # equivalent freq with timedelta + equiv_freq = ['D', Day(), Timedelta(days=1), timedelta(days=1)] + for freq in equiv_freq: + result = pd.interval_range(start=start, end=end, freq=freq) + tm.assert_index_equal(result, expected) + def test_errors(self): # not enough params - def f(): - interval_range(0) + msg = ('Of the three parameters: start, end, and periods, ' + 'exactly two must be specified') - pytest.raises(ValueError, f) + with tm.assert_raises_regex(ValueError, msg): + interval_range(start=0) - def f(): - interval_range(periods=2) + with tm.assert_raises_regex(ValueError, msg): + interval_range(end=5) - pytest.raises(ValueError, f) + with tm.assert_raises_regex(ValueError, msg): + interval_range(periods=2) - def f(): + with tm.assert_raises_regex(ValueError, msg): interval_range() - pytest.raises(ValueError, f) + # too many params + with tm.assert_raises_regex(ValueError, msg): + interval_range(start=0, end=5, periods=6) # mixed units - def f(): - interval_range(0, Timestamp('20130101'), freq=2) + msg = 'start, end, freq need to be type compatible' + with tm.assert_raises_regex(TypeError, msg): + interval_range(start=0, end=Timestamp('20130101'), freq=2) - pytest.raises(ValueError, f) + with tm.assert_raises_regex(TypeError, msg): + interval_range(start=0, end=Timedelta('1 day'), freq=2) - def f(): - interval_range(0, 10, freq=Timedelta('1day')) + with tm.assert_raises_regex(TypeError, msg): + interval_range(start=0, end=10, freq='D') - pytest.raises(ValueError, f) + with tm.assert_raises_regex(TypeError, msg): + interval_range(start=Timestamp('20130101'), end=10, freq='D') + + with tm.assert_raises_regex(TypeError, msg): + interval_range(start=Timestamp('20130101'), + end=Timedelta('1 day'), freq='D') + + with tm.assert_raises_regex(TypeError, msg): + interval_range(start=Timestamp('20130101'), + end=Timestamp('20130110'), freq=2) + + with tm.assert_raises_regex(TypeError, msg): + interval_range(start=Timedelta('1 day'), end=10, freq='D') + + with tm.assert_raises_regex(TypeError, msg): + interval_range(start=Timedelta('1 day'), + end=Timestamp('20130110'), freq='D') + + with tm.assert_raises_regex(TypeError, msg): + interval_range(start=Timedelta('1 day'), + end=Timedelta('10 days'), freq=2) + + # invalid periods + msg = 'periods must be a number, got foo' + with tm.assert_raises_regex(TypeError, msg): + interval_range(start=0, periods='foo') + + # invalid start + msg = 'start must be numeric or datetime-like, got foo' + with tm.assert_raises_regex(ValueError, msg): + interval_range(start='foo', periods=10) + + # invalid end + msg = 'end must be numeric or datetime-like, got \(0, 1\]' + with tm.assert_raises_regex(ValueError, msg): + interval_range(end=Interval(0, 1), periods=10) + + # invalid freq for datetime-like + msg = 'freq must be numeric or convertible to DateOffset, got foo' + with tm.assert_raises_regex(ValueError, msg): + interval_range(start=0, end=10, freq='foo') + + with tm.assert_raises_regex(ValueError, msg): + interval_range(start=Timestamp('20130101'), periods=10, freq='foo') + + with tm.assert_raises_regex(ValueError, msg): + interval_range(end=Timedelta('1 day'), periods=10, freq='foo') class TestIntervalTree(object): diff --git a/pandas/tests/indexes/timedeltas/test_construction.py b/pandas/tests/indexes/timedeltas/test_construction.py index dd25e2cca2e55..70aadd9f57174 100644 --- a/pandas/tests/indexes/timedeltas/test_construction.py +++ b/pandas/tests/indexes/timedeltas/test_construction.py @@ -50,8 +50,9 @@ def test_constructor_coverage(self): exp = timedelta_range('1 days', periods=10) tm.assert_index_equal(rng, exp) - pytest.raises(ValueError, TimedeltaIndex, start='1 days', - periods='foo', freq='D') + msg = 'periods must be a number, got foo' + with tm.assert_raises_regex(TypeError, msg): + TimedeltaIndex(start='1 days', periods='foo', freq='D') pytest.raises(ValueError, TimedeltaIndex, start='1 days', end='10 days') diff --git a/pandas/tests/indexes/timedeltas/test_timedelta_range.py b/pandas/tests/indexes/timedeltas/test_timedelta_range.py index 4732a0ce110de..7624e1f79af15 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta_range.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta_range.py @@ -1,5 +1,4 @@ import numpy as np - import pandas as pd import pandas.util.testing as tm from pandas.tseries.offsets import Day, Second @@ -49,3 +48,23 @@ def test_timedelta_range(self): expected = df.loc[pd.Timedelta('0s'):, :] result = df.loc['0s':, :] assert_frame_equal(expected, result) + + def test_errors(self): + # not enough params + msg = ('Of the three parameters: start, end, and periods, ' + 'exactly two must be specified') + with tm.assert_raises_regex(ValueError, msg): + timedelta_range(start='0 days') + + with tm.assert_raises_regex(ValueError, msg): + timedelta_range(end='5 days') + + with tm.assert_raises_regex(ValueError, msg): + timedelta_range(periods=2) + + with tm.assert_raises_regex(ValueError, msg): + timedelta_range() + + # too many params + with tm.assert_raises_regex(ValueError, msg): + timedelta_range(start='0 days', end='5 days', periods=10) From 97abd2c9c11aeee0e3d2c58a74d85fa75062ca1f Mon Sep 17 00:00:00 2001 From: Kirk Hansen Date: Thu, 14 Sep 2017 05:14:43 -0500 Subject: [PATCH 46/57] TST: Made s3 related tests mock boto (#17388) --- appveyor.yml | 6 ++ ci/install_circle.sh | 1 + ci/install_travis.sh | 2 +- ci/requirements-2.7_WIN.pip | 0 ci/requirements-3.6_NUMPY_DEV.pip | 0 ci/requirements-3.6_WIN.pip | 0 ci/requirements_dev.txt | 1 + pandas/tests/io/parser/data/tips.csv.bz2 | Bin 0 -> 1316 bytes pandas/tests/io/parser/data/tips.csv.gz | Bin 0 -> 1740 bytes pandas/tests/io/parser/test_network.py | 100 ++++++++++++++--------- pandas/tests/io/test_excel.py | 58 ++++++------- tox.ini | 1 + 12 files changed, 102 insertions(+), 67 deletions(-) create mode 100644 ci/requirements-2.7_WIN.pip create mode 100644 ci/requirements-3.6_NUMPY_DEV.pip create mode 100644 ci/requirements-3.6_WIN.pip create mode 100644 pandas/tests/io/parser/data/tips.csv.bz2 create mode 100644 pandas/tests/io/parser/data/tips.csv.gz diff --git a/appveyor.yml b/appveyor.yml index 65e62f887554e..a1f8886f6d068 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -74,12 +74,18 @@ install: # create our env - cmd: conda create -n pandas python=%PYTHON_VERSION% cython pytest>=3.1.0 pytest-xdist - cmd: activate pandas + - cmd: pip install moto - SET REQ=ci\requirements-%PYTHON_VERSION%_WIN.run - cmd: echo "installing requirements from %REQ%" - cmd: conda install -n pandas --file=%REQ% - cmd: conda list -n pandas - cmd: echo "installing requirements from %REQ% - done" + # add some pip only reqs to the env + - SET REQ=ci\requirements-%PYTHON_VERSION%_WIN.pip + - cmd: echo "installing requirements from %REQ%" + - cmd: pip install -Ur %REQ% + # build em using the local source checkout in the correct windows env - cmd: '%CMD_IN_ENV% python setup.py build_ext --inplace' diff --git a/ci/install_circle.sh b/ci/install_circle.sh index 29ca69970104b..fd79f907625e9 100755 --- a/ci/install_circle.sh +++ b/ci/install_circle.sh @@ -67,6 +67,7 @@ time conda create -n pandas -q --file=${REQ_BUILD} || exit 1 time conda install -n pandas pytest>=3.1.0 || exit 1 source activate pandas +time pip install moto || exit 1 # build but don't install echo "[build em]" diff --git a/ci/install_travis.sh b/ci/install_travis.sh index d26689f2e6b4b..b85263daa1eac 100755 --- a/ci/install_travis.sh +++ b/ci/install_travis.sh @@ -104,7 +104,7 @@ if [ -e ${REQ} ]; then fi time conda install -n pandas pytest>=3.1.0 -time pip install pytest-xdist +time pip install pytest-xdist moto if [ "$LINT" ]; then conda install flake8 diff --git a/ci/requirements-2.7_WIN.pip b/ci/requirements-2.7_WIN.pip new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/ci/requirements-3.6_NUMPY_DEV.pip b/ci/requirements-3.6_NUMPY_DEV.pip new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/ci/requirements-3.6_WIN.pip b/ci/requirements-3.6_WIN.pip new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/ci/requirements_dev.txt b/ci/requirements_dev.txt index c7190c506ba18..dbc4f6cbd6509 100644 --- a/ci/requirements_dev.txt +++ b/ci/requirements_dev.txt @@ -5,3 +5,4 @@ cython pytest>=3.1.0 pytest-cov flake8 +moto diff --git a/pandas/tests/io/parser/data/tips.csv.bz2 b/pandas/tests/io/parser/data/tips.csv.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..1452896b05e9d41f58ffd816a0459d86796718a6 GIT binary patch literal 1316 zcmV+<1>5>UT4*^jL0KkKS@WgHUjPpp-+%xR00n>G1qTcuzHi=eU zr8L5NfM@_34^ia=nn@4@ngc)pXm>4ki*@VR?6|SRoF#LZ+TkL$)Z)}c<#mBig_KMX zruJeOi&bv;V=*04xP@hDQp(ibF*2pqxW%nuMr@F6Gix?+fsH|aKayy7UwGa_-`dVs zYfM$)R7$k8wpC6gfmM#M!-v|)iP#1h4cPkh|rkJNTD3*02| zUew#%bX<$c*~vCvMH>_%oV^S&6a+#ukskADG3ECrBRBE^v4aChy? zvDazQUv(jtyOFJd%+RitVq;Fo?$ru4tx8y4RWLAw3OQ&r5YZ6QA(|s=%EqEnNvFyDucBxbJ63X0f6|L)lrAb?vZoDHd%^>qwTK z8M-E+R_N`PibFFSF!cCl2Z7}>xeJ`*<3&DX2?dNalnbN*vYZ7QTLis}+CyTbyv{>s zl!hm_!_I4KZE}>uSzBr=*www83fCT-SPZ&+p@dCkFG(R6{D)ETHdAf-8>fnW#-GXdM4pE5VK!{hIp z4{*7H7hK39V*E6-z)7yKmA;#^4 z#PVN7@@@mJL*EhAX#`mH2SAk2lkhNXJBL>BHS&`^r&JS)>z58UjoYiOCqY*zmz*K6 z1SFlk-!Cn`6liVaz=_bPhSWpu1LJ>%Cxlk3T;w2WIQ0LRX3%vrxUPW z8d$X$uIXc_sI{9kN=EXFie6i&h29y!AZcb)r??rFOLu%3R3P<2gpt$oRe1O6gk~8T zu3j+kM{M-PhPbG60sxBGP*RgE)NL!@Yr%+f=+n7l@JL0;84IYj5yo31-0M)BHp<)Q zzkK_6UA}%i|M3mU6cFV&C+q8L8zqA-)xv!>^z@7=Fgi9q_iLEzwg+!G2w0Ts9jf*M z64F>g8RrtB4m-(FnM=?v>|@tRdI1$7H2kMsssN5^GU(*!z`p{ft@Qr;@_OlzdPSq# z=N&m=z8R{dV?dV-Iwe>fL1(0h{JJ}+<6sZ(@ePlLCs;FVmX?rYPxs1DA(^whpU+gQLdb{bOK!0;_ zkQW*TzXUDj{aqJ}zCZT`AFw?MCRq$YLmUun3sPt|TJ|F1y1->qh6EwxZc5srUOK?6 zfIOA24Gq;xs91xZWkXI-kgFkpK@VM+dImzp9WY2eRlGn`2@#FO*RJOK&vl0mX5&x| zsC*~R>SEi53Wfn0JC1s5&DImTC?CmS%t%KJn8SnJ{vz7Tu;z{(oX1Uj?2r-D=FHLg z#Nx)*tqL1*0`$uskSzVPPI~Zw87JK{kHS;|mjvLPazsSBBGTEE(XeUKcA)Oa1!1&{ ziGd~d!Xgpq$A_L=)+{U2btCFAD_NiGHe#QuSj!mhzmK3jN5V2e#ai_;@D^ZS3^-kH z6guhK*S?INWvhtT8n-^y8%I8HZbrKc2koF=btc|VG&cU-G4a~h=kf7qrTv=Ut%I~S zEXzKRMTs`<+xJ_K%nb(}Ie8d~S$W#@BiccQnPiO(+O^Yd9ou<9tf*;o$=WeUAZqAG zyzyj!F_p;rzPQ?Y92;+@To35Y<=xOSTm>@DJ;}6?*Lzr=TgaG9BIbr{y}$`b72TY! zqYYtgpVJv*bV|eFpvy$Pm>HFtbh_Na_)b19LfLd-0+3QVd;u1iG1e^0tsmq27&c@f zqhD+!jOz~T@n@5$<6yJqL9iFfH0&B9mSe(Zd*O_H&`()&cv#qX>*83gV@pnS)Uxa6 zh&!W4Kw{zbuyG*bJ30s^kL%1hKc#3Y!TLa1|HGI+q2~|%8;0j+sEAdd#O2^p#_J5{ zqk&o!uGkw*Xq2S)W72nPTLSJR3mF;xQOdr}*By;^C3XK=k7;*$ zylq6O8Vck|96AOM^M;z(GGMh%)?T{?8o*P+jIR3%VPB~S`#)bVj@Hps@zV;k&aoL? zJT_x>_m~9QgT~p5h literal 0 HcmV?d00001 diff --git a/pandas/tests/io/parser/test_network.py b/pandas/tests/io/parser/test_network.py index 3344243f8137a..27cc708889fa2 100644 --- a/pandas/tests/io/parser/test_network.py +++ b/pandas/tests/io/parser/test_network.py @@ -4,13 +4,20 @@ Tests parsers ability to read and parse non-local files and hence require a network connection to be read. """ - import os + import pytest +import moto import pandas.util.testing as tm from pandas import DataFrame from pandas.io.parsers import read_csv, read_table +from pandas.compat import BytesIO + + +@pytest.fixture(scope='module') +def tips_file(): + return os.path.join(tm.get_data_path(), 'tips.csv') @pytest.fixture(scope='module') @@ -19,6 +26,40 @@ def salaries_table(): return read_table(path) +@pytest.fixture(scope='module') +def s3_resource(tips_file): + pytest.importorskip('s3fs') + moto.mock_s3().start() + + test_s3_files = [ + ('tips.csv', tips_file), + ('tips.csv.gz', tips_file + '.gz'), + ('tips.csv.bz2', tips_file + '.bz2'), + ] + + def add_tips_files(bucket_name): + for s3_key, file_name in test_s3_files: + with open(file_name, 'rb') as f: + conn.Bucket(bucket_name).put_object( + Key=s3_key, + Body=f) + + boto3 = pytest.importorskip('boto3') + # see gh-16135 + bucket = 'pandas-test' + + conn = boto3.resource("s3", region_name="us-east-1") + conn.create_bucket(Bucket=bucket) + add_tips_files(bucket) + + conn.create_bucket(Bucket='cant_get_it', ACL='private') + add_tips_files('cant_get_it') + + yield conn + + moto.mock_s3().stop() + + @pytest.mark.network @pytest.mark.parametrize( "compression,extension", @@ -51,15 +92,11 @@ def check_compressed_urls(salaries_table, compression, extension, mode, class TestS3(object): - - def setup_method(self, method): - try: - import s3fs # noqa - except ImportError: - pytest.skip("s3fs not installed") - @tm.network def test_parse_public_s3_bucket(self): + pytest.importorskip('s3fs') + # more of an integration test due to the not-public contents portion + # can probably mock this though. for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]: df = read_csv('s3://pandas-test/tips.csv' + ext, compression=comp) @@ -74,8 +111,8 @@ def test_parse_public_s3_bucket(self): assert not df.empty tm.assert_frame_equal(read_csv(tm.get_data_path('tips.csv')), df) - @tm.network - def test_parse_public_s3n_bucket(self): + def test_parse_public_s3n_bucket(self, s3_resource): + # Read from AWS s3 as "s3n" URL df = read_csv('s3n://pandas-test/tips.csv', nrows=10) assert isinstance(df, DataFrame) @@ -83,8 +120,7 @@ def test_parse_public_s3n_bucket(self): tm.assert_frame_equal(read_csv( tm.get_data_path('tips.csv')).iloc[:10], df) - @tm.network - def test_parse_public_s3a_bucket(self): + def test_parse_public_s3a_bucket(self, s3_resource): # Read from AWS s3 as "s3a" URL df = read_csv('s3a://pandas-test/tips.csv', nrows=10) assert isinstance(df, DataFrame) @@ -92,8 +128,7 @@ def test_parse_public_s3a_bucket(self): tm.assert_frame_equal(read_csv( tm.get_data_path('tips.csv')).iloc[:10], df) - @tm.network - def test_parse_public_s3_bucket_nrows(self): + def test_parse_public_s3_bucket_nrows(self, s3_resource): for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]: df = read_csv('s3://pandas-test/tips.csv' + ext, nrows=10, compression=comp) @@ -102,8 +137,7 @@ def test_parse_public_s3_bucket_nrows(self): tm.assert_frame_equal(read_csv( tm.get_data_path('tips.csv')).iloc[:10], df) - @tm.network - def test_parse_public_s3_bucket_chunked(self): + def test_parse_public_s3_bucket_chunked(self, s3_resource): # Read with a chunksize chunksize = 5 local_tips = read_csv(tm.get_data_path('tips.csv')) @@ -121,8 +155,7 @@ def test_parse_public_s3_bucket_chunked(self): chunksize * i_chunk: chunksize * (i_chunk + 1)] tm.assert_frame_equal(true_df, df) - @tm.network - def test_parse_public_s3_bucket_chunked_python(self): + def test_parse_public_s3_bucket_chunked_python(self, s3_resource): # Read with a chunksize using the Python parser chunksize = 5 local_tips = read_csv(tm.get_data_path('tips.csv')) @@ -140,8 +173,7 @@ def test_parse_public_s3_bucket_chunked_python(self): chunksize * i_chunk: chunksize * (i_chunk + 1)] tm.assert_frame_equal(true_df, df) - @tm.network - def test_parse_public_s3_bucket_python(self): + def test_parse_public_s3_bucket_python(self, s3_resource): for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]: df = read_csv('s3://pandas-test/tips.csv' + ext, engine='python', compression=comp) @@ -150,8 +182,7 @@ def test_parse_public_s3_bucket_python(self): tm.assert_frame_equal(read_csv( tm.get_data_path('tips.csv')), df) - @tm.network - def test_infer_s3_compression(self): + def test_infer_s3_compression(self, s3_resource): for ext in ['', '.gz', '.bz2']: df = read_csv('s3://pandas-test/tips.csv' + ext, engine='python', compression='infer') @@ -160,8 +191,7 @@ def test_infer_s3_compression(self): tm.assert_frame_equal(read_csv( tm.get_data_path('tips.csv')), df) - @tm.network - def test_parse_public_s3_bucket_nrows_python(self): + def test_parse_public_s3_bucket_nrows_python(self, s3_resource): for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]: df = read_csv('s3://pandas-test/tips.csv' + ext, engine='python', nrows=10, compression=comp) @@ -170,8 +200,7 @@ def test_parse_public_s3_bucket_nrows_python(self): tm.assert_frame_equal(read_csv( tm.get_data_path('tips.csv')).iloc[:10], df) - @tm.network - def test_s3_fails(self): + def test_s3_fails(self, s3_resource): with pytest.raises(IOError): read_csv('s3://nyqpug/asdf.csv') @@ -180,21 +209,18 @@ def test_s3_fails(self): with pytest.raises(IOError): read_csv('s3://cant_get_it/') - @tm.network - def boto3_client_s3(self): + def test_read_csv_handles_boto_s3_object(self, + s3_resource, + tips_file): # see gh-16135 - # boto3 is a dependency of s3fs - import boto3 - client = boto3.client("s3") - - key = "/tips.csv" - bucket = "pandas-test" - s3_object = client.get_object(Bucket=bucket, Key=key) + s3_object = s3_resource.meta.client.get_object( + Bucket='pandas-test', + Key='tips.csv') - result = read_csv(s3_object["Body"]) + result = read_csv(BytesIO(s3_object["Body"].read()), encoding='utf8') assert isinstance(result, DataFrame) assert not result.empty - expected = read_csv(tm.get_data_path('tips.csv')) + expected = read_csv(tips_file) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index 92147b46097b8..6a399f41975e5 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -1,33 +1,32 @@ # pylint: disable=E1101 - -from pandas.compat import u, range, map, openpyxl_compat, BytesIO, iteritems -from datetime import datetime, date, time -import sys +import functools +import operator import os +import sys +import warnings +from datetime import datetime, date, time from distutils.version import LooseVersion from functools import partial - -import warnings from warnings import catch_warnings -import operator -import functools -import pytest -from numpy import nan import numpy as np +import pytest +from numpy import nan +import moto import pandas as pd +import pandas.util.testing as tm from pandas import DataFrame, Index, MultiIndex -from pandas.io.formats.excel import ExcelFormatter -from pandas.io.parsers import read_csv +from pandas.compat import u, range, map, openpyxl_compat, BytesIO, iteritems +from pandas.core.config import set_option, get_option +from pandas.io.common import URLError from pandas.io.excel import ( ExcelFile, ExcelWriter, read_excel, _XlwtWriter, _Openpyxl1Writer, _Openpyxl20Writer, _Openpyxl22Writer, register_writer, _XlsxWriter ) -from pandas.io.common import URLError +from pandas.io.formats.excel import ExcelFormatter +from pandas.io.parsers import read_csv from pandas.util.testing import ensure_clean, makeCustomDataframe as mkdf -from pandas.core.config import set_option, get_option -import pandas.util.testing as tm def _skip_if_no_xlrd(): @@ -67,13 +66,6 @@ def _skip_if_no_excelsuite(): _skip_if_no_openpyxl() -def _skip_if_no_s3fs(): - try: - import s3fs # noqa - except ImportError: - pytest.skip('s3fs not installed, skipping') - - _seriesd = tm.getSeriesData() _tsd = tm.getTimeSeriesData() _frame = DataFrame(_seriesd)[:10] @@ -605,14 +597,22 @@ def test_read_from_http_url(self): local_table = self.get_exceldf('test1') tm.assert_frame_equal(url_table, local_table) - @tm.network(check_before_test=True) def test_read_from_s3_url(self): - _skip_if_no_s3fs() - - url = ('s3://pandas-test/test1' + self.ext) - url_table = read_excel(url) - local_table = self.get_exceldf('test1') - tm.assert_frame_equal(url_table, local_table) + boto3 = pytest.importorskip('boto3') + pytest.importorskip('s3fs') + + with moto.mock_s3(): + conn = boto3.resource("s3", region_name="us-east-1") + conn.create_bucket(Bucket="pandas-test") + file_name = os.path.join(self.dirpath, 'test1' + self.ext) + with open(file_name, 'rb') as f: + conn.Bucket("pandas-test").put_object(Key="test1" + self.ext, + Body=f) + + url = ('s3://pandas-test/test1' + self.ext) + url_table = read_excel(url) + local_table = self.get_exceldf('test1') + tm.assert_frame_equal(url_table, local_table) @pytest.mark.slow def test_read_from_file_url(self): diff --git a/tox.ini b/tox.ini index 45ad7fc451e76..f055251581a93 100644 --- a/tox.ini +++ b/tox.ini @@ -19,6 +19,7 @@ deps = xlrd six sqlalchemy + moto # cd to anything but the default {toxinidir} which # contains the pandas subdirectory and confuses From 0097cb712a7361a69eb4f5ebb9bc13c2b8733f19 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 14 Sep 2017 11:09:30 -0500 Subject: [PATCH 47/57] PERF: Avoid values in Categorical.set_categories (#17515) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mater: ```python In [1]: import pandas as pd; import numpy as np In [2]: arr = ['s%04d' % i for i in np.random.randint(0, 500000 // 10, size=500000)]; s = pd.Series(arr).astype('category') In [3]: %timeit s.cat.set_categories(s.cat.categories) 68.5 ms ± 846 µs per loop (mean ± std. dev. of 7 runs, 10 loops each) ``` HEAD: ```python In [1]: import pandas as pd; import numpy as np In [2]: arr = ['s%04d' % i for i in np.random.randint(0, 500000 // 10, size=500000)] s = pd.Series(arr).astype('category') In [3]: %timeit s.cat.set_categories(s.cat.categories) 7.43 ms ± 110 µs per loop (mean ± std. dev. of 7 runs, 100 loops each) ``` Closes https://github.com/pandas-dev/pandas/issues/17508 --- asv_bench/benchmarks/categoricals.py | 3 ++ doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/categorical.py | 37 ++++++++++++++++- pandas/core/dtypes/concat.py | 11 ++--- pandas/tests/test_categorical.py | 62 ++++++++++++++++++++++++++++ 5 files changed, 104 insertions(+), 10 deletions(-) diff --git a/asv_bench/benchmarks/categoricals.py b/asv_bench/benchmarks/categoricals.py index 6432ccfb19efe..d90c994b3d194 100644 --- a/asv_bench/benchmarks/categoricals.py +++ b/asv_bench/benchmarks/categoricals.py @@ -67,6 +67,9 @@ def time_value_counts_dropna(self): def time_rendering(self): str(self.sel) + def time_set_categories(self): + self.ts.cat.set_categories(self.ts.cat.categories[::2]) + class Categoricals3(object): goal_time = 0.2 diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 939199d3f6fa6..6495ad3e7f6ad 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -467,6 +467,7 @@ Performance Improvements - Improved performance of instantiating :class:`SparseDataFrame` (:issue:`16773`) - :attr:`Series.dt` no longer performs frequency inference, yielding a large speedup when accessing the attribute (:issue:`17210`) +- Improved performance of :meth:`Categorical.set_categories` by not materializing the values (:issue:`17508`) - :attr:`Timestamp.microsecond` no longer re-computes on attribute access (:issue:`17331`) .. _whatsnew_0210.bug_fixes: diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index 97df72900428c..e67ce2936819f 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -777,8 +777,9 @@ def set_categories(self, new_categories, ordered=None, rename=False, # remove all _codes which are larger and set to -1/NaN self._codes[self._codes >= len(new_categories)] = -1 else: - values = cat.__array__() - cat._codes = _get_codes_for_values(values, new_categories) + codes = _recode_for_categories(self.codes, self.categories, + new_categories) + cat._codes = codes cat._categories = new_categories if ordered is None: @@ -2113,6 +2114,38 @@ def _get_codes_for_values(values, categories): return coerce_indexer_dtype(t.lookup(vals), cats) +def _recode_for_categories(codes, old_categories, new_categories): + """ + Convert a set of codes for to a new set of categories + + Parameters + ---------- + codes : array + old_categories, new_categories : Index + + Returns + ------- + new_codes : array + + Examples + -------- + >>> old_cat = pd.Index(['b', 'a', 'c']) + >>> new_cat = pd.Index(['a', 'b']) + >>> codes = np.array([0, 1, 1, 2]) + >>> _recode_for_categories(codes, old_cat, new_cat) + array([ 1, 0, 0, -1]) + """ + from pandas.core.algorithms import take_1d + + if len(old_categories) == 0: + # All null anyway, so just retain the nulls + return codes + indexer = coerce_indexer_dtype(new_categories.get_indexer(old_categories), + new_categories) + new_codes = take_1d(indexer, codes.copy(), fill_value=-1) + return new_codes + + def _convert_to_list_like(list_like): if hasattr(list_like, "dtype"): return list_like diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index 0ce45eea119ed..f6f956832eebe 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -314,6 +314,7 @@ def union_categoricals(to_union, sort_categories=False, ignore_order=False): Categories (3, object): [b, c, a] """ from pandas import Index, Categorical, CategoricalIndex, Series + from pandas.core.categorical import _recode_for_categories if len(to_union) == 0: raise ValueError('No Categoricals to union') @@ -359,14 +360,8 @@ def _maybe_unwrap(x): new_codes = [] for c in to_union: - if len(c.categories) > 0: - indexer = categories.get_indexer(c.categories) - - from pandas.core.algorithms import take_1d - new_codes.append(take_1d(indexer, c.codes, fill_value=-1)) - else: - # must be all NaN - new_codes.append(c.codes) + new_codes.append(_recode_for_categories(c.codes, c.categories, + categories)) new_codes = np.concatenate(new_codes) else: # ordered - to show a proper error message diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index 7bbe220378993..8a5f6bf110be3 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -26,6 +26,7 @@ Interval, IntervalIndex) from pandas.compat import range, lrange, u, PY3, PYPY from pandas.core.config import option_context +from pandas.core.categorical import _recode_for_categories class TestCategorical(object): @@ -963,6 +964,67 @@ def test_rename_categories(self): with pytest.raises(ValueError): cat.rename_categories([1, 2]) + @pytest.mark.parametrize('codes, old, new, expected', [ + ([0, 1], ['a', 'b'], ['a', 'b'], [0, 1]), + ([0, 1], ['b', 'a'], ['b', 'a'], [0, 1]), + ([0, 1], ['a', 'b'], ['b', 'a'], [1, 0]), + ([0, 1], ['b', 'a'], ['a', 'b'], [1, 0]), + ([0, 1, 0, 1], ['a', 'b'], ['a', 'b', 'c'], [0, 1, 0, 1]), + ([0, 1, 2, 2], ['a', 'b', 'c'], ['a', 'b'], [0, 1, -1, -1]), + ([0, 1, -1], ['a', 'b', 'c'], ['a', 'b', 'c'], [0, 1, -1]), + ([0, 1, -1], ['a', 'b', 'c'], ['b'], [-1, 0, -1]), + ([0, 1, -1], ['a', 'b', 'c'], ['d'], [-1, -1, -1]), + ([0, 1, -1], ['a', 'b', 'c'], [], [-1, -1, -1]), + ([-1, -1], [], ['a', 'b'], [-1, -1]), + ([1, 0], ['b', 'a'], ['a', 'b'], [0, 1]), + ]) + def test_recode_to_categories(self, codes, old, new, expected): + codes = np.asanyarray(codes, dtype=np.int8) + expected = np.asanyarray(expected, dtype=np.int8) + old = Index(old) + new = Index(new) + result = _recode_for_categories(codes, old, new) + tm.assert_numpy_array_equal(result, expected) + + def test_recode_to_categories_large(self): + N = 1000 + codes = np.arange(N) + old = Index(codes) + expected = np.arange(N - 1, -1, -1, dtype=np.int16) + new = Index(expected) + result = _recode_for_categories(codes, old, new) + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize('values, categories, new_categories', [ + # No NaNs, same cats, same order + (['a', 'b', 'a'], ['a', 'b'], ['a', 'b'],), + # No NaNs, same cats, different order + (['a', 'b', 'a'], ['a', 'b'], ['b', 'a'],), + # Same, unsorted + (['b', 'a', 'a'], ['a', 'b'], ['a', 'b'],), + # No NaNs, same cats, different order + (['b', 'a', 'a'], ['a', 'b'], ['b', 'a'],), + # NaNs + (['a', 'b', 'c'], ['a', 'b'], ['a', 'b']), + (['a', 'b', 'c'], ['a', 'b'], ['b', 'a']), + (['b', 'a', 'c'], ['a', 'b'], ['a', 'b']), + (['b', 'a', 'c'], ['a', 'b'], ['a', 'b']), + # Introduce NaNs + (['a', 'b', 'c'], ['a', 'b'], ['a']), + (['a', 'b', 'c'], ['a', 'b'], ['b']), + (['b', 'a', 'c'], ['a', 'b'], ['a']), + (['b', 'a', 'c'], ['a', 'b'], ['a']), + # No overlap + (['a', 'b', 'c'], ['a', 'b'], ['d', 'e']), + ]) + @pytest.mark.parametrize('ordered', [True, False]) + def test_set_categories_many(self, values, categories, new_categories, + ordered): + c = Categorical(values, categories) + expected = Categorical(values, new_categories, ordered) + result = c.set_categories(new_categories, ordered=ordered) + tm.assert_categorical_equal(result, expected) + def test_reorder_categories(self): cat = Categorical(["a", "b", "c", "a"], ordered=True) old = cat.copy() From 06a6e63c317e5291eb78081e2a21bc163ddaab6e Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 14 Sep 2017 15:48:59 -0700 Subject: [PATCH 48/57] remove period_helper from non-period reqs (#17531) --- setup.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/setup.py b/setup.py index 434ca64473916..664478cc35845 100755 --- a/setup.py +++ b/setup.py @@ -461,7 +461,6 @@ def pxd(name): tseries_depends = ['pandas/_libs/src/datetime/np_datetime.h', 'pandas/_libs/src/datetime/np_datetime_strings.h', - 'pandas/_libs/src/period_helper.h', 'pandas/_libs/src/datetime.pxd'] @@ -478,11 +477,11 @@ def pxd(name): 'pxdfiles': ['_libs/src/util'], 'depends': tseries_depends, 'sources': ['pandas/_libs/src/datetime/np_datetime.c', - 'pandas/_libs/src/datetime/np_datetime_strings.c', - 'pandas/_libs/src/period_helper.c']}, + 'pandas/_libs/src/datetime/np_datetime_strings.c']}, '_libs.tslibs.timezones': {'pyxfile': '_libs/tslibs/timezones'}, '_libs.period': {'pyxfile': '_libs/period', - 'depends': tseries_depends, + 'depends': (tseries_depends + + ['pandas/_libs/src/period_helper.h']), 'sources': ['pandas/_libs/src/datetime/np_datetime.c', 'pandas/_libs/src/datetime/np_datetime_strings.c', 'pandas/_libs/src/period_helper.c']}, From ad70ed4ba921360169820dabd16e4475c527479f Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 14 Sep 2017 15:52:53 -0700 Subject: [PATCH 49/57] Fix bug where offset.copy() != offset (#17452) --- pandas/tests/tseries/test_offsets.py | 5 + pandas/tseries/offsets.py | 180 ++++++++++++++++----------- 2 files changed, 115 insertions(+), 70 deletions(-) diff --git a/pandas/tests/tseries/test_offsets.py b/pandas/tests/tseries/test_offsets.py index 7e6e85f322fe0..cd2c29ffe3ac6 100644 --- a/pandas/tests/tseries/test_offsets.py +++ b/pandas/tests/tseries/test_offsets.py @@ -1955,6 +1955,11 @@ def _check_roundtrip(obj): _check_roundtrip(self._object(2)) _check_roundtrip(self._object() * 2) + def test_copy(self): + # GH 17452 + off = self._object(weekmask='Mon Wed Fri') + assert off == off.copy() + class TestCustomBusinessMonthEnd(CustomBusinessMonthBase, Base): _object = CBMonthEnd diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 7ccecaa84e6d6..d82a3a209af6b 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -11,6 +11,7 @@ from dateutil.relativedelta import relativedelta, weekday from dateutil.easter import easter from pandas._libs import tslib, Timestamp, OutOfBoundsDatetime, Timedelta +from pandas.util._decorators import cache_readonly import functools import operator @@ -573,9 +574,9 @@ def __setstate__(self, state): """Reconstruct an instance from a pickled state""" self.__dict__ = state if 'weekmask' in state and 'holidays' in state: - calendar, holidays = self.get_calendar(weekmask=self.weekmask, - holidays=self.holidays, - calendar=None) + calendar, holidays = _get_calendar(weekmask=self.weekmask, + holidays=self.holidays, + calendar=None) self.kwds['calendar'] = self.calendar = calendar self.kwds['holidays'] = self.holidays = holidays self.kwds['weekmask'] = state['weekmask'] @@ -978,9 +979,9 @@ def __init__(self, n=1, normalize=False, weekmask='Mon Tue Wed Thu Fri', self.normalize = normalize self.kwds = kwds self.offset = kwds.get('offset', timedelta(0)) - calendar, holidays = self.get_calendar(weekmask=weekmask, - holidays=holidays, - calendar=calendar) + calendar, holidays = _get_calendar(weekmask=weekmask, + holidays=holidays, + calendar=calendar) # CustomBusinessDay instances are identified by the # following two attributes. See DateOffset._params() # holidays, weekmask @@ -989,36 +990,6 @@ def __init__(self, n=1, normalize=False, weekmask='Mon Tue Wed Thu Fri', self.kwds['holidays'] = self.holidays = holidays self.kwds['calendar'] = self.calendar = calendar - def get_calendar(self, weekmask, holidays, calendar): - """Generate busdaycalendar""" - if isinstance(calendar, np.busdaycalendar): - if not holidays: - holidays = tuple(calendar.holidays) - elif not isinstance(holidays, tuple): - holidays = tuple(holidays) - else: - # trust that calendar.holidays and holidays are - # consistent - pass - return calendar, holidays - - if holidays is None: - holidays = [] - try: - holidays = holidays + calendar.holidays().tolist() - except AttributeError: - pass - holidays = [self._to_dt64(dt, dtype='datetime64[D]') for dt in - holidays] - holidays = tuple(sorted(holidays)) - - kwargs = {'weekmask': weekmask} - if holidays: - kwargs['holidays'] = holidays - - busdaycalendar = np.busdaycalendar(**kwargs) - return busdaycalendar, holidays - @apply_wraps def apply(self, other): if self.n <= 0: @@ -1050,25 +1021,10 @@ def apply(self, other): def apply_index(self, i): raise NotImplementedError - @staticmethod - def _to_dt64(dt, dtype='datetime64'): - # Currently - # > np.datetime64(dt.datetime(2013,5,1),dtype='datetime64[D]') - # numpy.datetime64('2013-05-01T02:00:00.000000+0200') - # Thus astype is needed to cast datetime to datetime64[D] - if getattr(dt, 'tzinfo', None) is not None: - i8 = tslib.pydt_to_i8(dt) - dt = tslib.tz_convert_single(i8, 'UTC', dt.tzinfo) - dt = Timestamp(dt) - dt = np.datetime64(dt) - if dt.dtype.name != dtype: - dt = dt.astype(dtype) - return dt - def onOffset(self, dt): if self.normalize and not _is_normalized(dt): return False - day64 = self._to_dt64(dt, 'datetime64[D]') + day64 = _to_dt64(dt, 'datetime64[D]') return np.is_busday(day64, busdaycal=self.calendar) @@ -1087,19 +1043,25 @@ def __init__(self, n=1, normalize=False, weekmask='Mon Tue Wed Thu Fri', self.n = int(n) self.normalize = normalize super(CustomBusinessHour, self).__init__(**kwds) + + calendar, holidays = _get_calendar(weekmask=weekmask, + holidays=holidays, + calendar=calendar) + self.kwds['weekmask'] = self.weekmask = weekmask + self.kwds['holidays'] = self.holidays = holidays + self.kwds['calendar'] = self.calendar = calendar + + @cache_readonly + def next_bday(self): # used for moving to next businessday if self.n >= 0: nb_offset = 1 else: nb_offset = -1 - self.next_bday = CustomBusinessDay(n=nb_offset, - weekmask=weekmask, - holidays=holidays, - calendar=calendar) - - self.kwds['weekmask'] = self.next_bday.weekmask - self.kwds['holidays'] = self.next_bday.holidays - self.kwds['calendar'] = self.next_bday.calendar + return CustomBusinessDay(n=nb_offset, + weekmask=self.weekmask, + holidays=self.holidays, + calendar=self.calendar) class MonthOffset(SingleConstructorOffset): @@ -1471,11 +1433,25 @@ def __init__(self, n=1, normalize=False, weekmask='Mon Tue Wed Thu Fri', self.normalize = normalize self.kwds = kwds self.offset = kwds.get('offset', timedelta(0)) - self.cbday = CustomBusinessDay(n=self.n, normalize=normalize, - weekmask=weekmask, holidays=holidays, - calendar=calendar, **kwds) - self.m_offset = MonthEnd(n=1, normalize=normalize, **kwds) - self.kwds['calendar'] = self.cbday.calendar # cache numpy calendar + + calendar, holidays = _get_calendar(weekmask=weekmask, + holidays=holidays, + calendar=calendar) + self.kwds['weekmask'] = self.weekmask = weekmask + self.kwds['holidays'] = self.holidays = holidays + self.kwds['calendar'] = self.calendar = calendar + + @cache_readonly + def cbday(self): + kwds = self.kwds + return CustomBusinessDay(n=self.n, normalize=self.normalize, **kwds) + + @cache_readonly + def m_offset(self): + kwds = self.kwds + kwds = {key: kwds[key] for key in kwds + if key not in ['calendar', 'weekmask', 'holidays']} + return MonthEnd(n=1, normalize=self.normalize, **kwds) @apply_wraps def apply(self, other): @@ -1531,11 +1507,27 @@ def __init__(self, n=1, normalize=False, weekmask='Mon Tue Wed Thu Fri', self.normalize = normalize self.kwds = kwds self.offset = kwds.get('offset', timedelta(0)) - self.cbday = CustomBusinessDay(n=self.n, normalize=normalize, - weekmask=weekmask, holidays=holidays, - calendar=calendar, **kwds) - self.m_offset = MonthBegin(n=1, normalize=normalize, **kwds) - self.kwds['calendar'] = self.cbday.calendar # cache numpy calendar + + # _get_calendar does validation and possible transformation + # of calendar and holidays. + calendar, holidays = _get_calendar(weekmask=weekmask, + holidays=holidays, + calendar=calendar) + kwds['calendar'] = self.calendar = calendar + kwds['weekmask'] = self.weekmask = weekmask + kwds['holidays'] = self.holidays = holidays + + @cache_readonly + def cbday(self): + kwds = self.kwds + return CustomBusinessDay(n=self.n, normalize=self.normalize, **kwds) + + @cache_readonly + def m_offset(self): + kwds = self.kwds + kwds = {key: kwds[key] for key in kwds + if key not in ['calendar', 'weekmask', 'holidays']} + return MonthBegin(n=1, normalize=self.normalize, **kwds) @apply_wraps def apply(self, other): @@ -2861,6 +2853,54 @@ class Nano(Tick): CBMonthBegin = CustomBusinessMonthBegin CDay = CustomBusinessDay +# --------------------------------------------------------------------- +# Business Calendar helpers + + +def _get_calendar(weekmask, holidays, calendar): + """Generate busdaycalendar""" + if isinstance(calendar, np.busdaycalendar): + if not holidays: + holidays = tuple(calendar.holidays) + elif not isinstance(holidays, tuple): + holidays = tuple(holidays) + else: + # trust that calendar.holidays and holidays are + # consistent + pass + return calendar, holidays + + if holidays is None: + holidays = [] + try: + holidays = holidays + calendar.holidays().tolist() + except AttributeError: + pass + holidays = [_to_dt64(dt, dtype='datetime64[D]') for dt in holidays] + holidays = tuple(sorted(holidays)) + + kwargs = {'weekmask': weekmask} + if holidays: + kwargs['holidays'] = holidays + + busdaycalendar = np.busdaycalendar(**kwargs) + return busdaycalendar, holidays + + +def _to_dt64(dt, dtype='datetime64'): + # Currently + # > np.datetime64(dt.datetime(2013,5,1),dtype='datetime64[D]') + # numpy.datetime64('2013-05-01T02:00:00.000000+0200') + # Thus astype is needed to cast datetime to datetime64[D] + if getattr(dt, 'tzinfo', None) is not None: + i8 = tslib.pydt_to_i8(dt) + dt = tslib.tz_convert_single(i8, 'UTC', dt.tzinfo) + dt = Timestamp(dt) + dt = np.datetime64(dt) + if dt.dtype.name != dtype: + dt = dt.astype(dtype) + return dt + def _get_firstbday(wkday): """ From 94266d48e5f54287a877cf7a0e94ef740e3eda22 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 14 Sep 2017 18:29:39 -0500 Subject: [PATCH 50/57] PERF: Faster CategoricalIndex from categorical (#17513) --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/indexes/category.py | 4 ++++ pandas/tests/indexes/test_category.py | 10 ++++++++++ 3 files changed, 15 insertions(+) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 6495ad3e7f6ad..52e056103cbdc 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -469,6 +469,7 @@ Performance Improvements - :attr:`Series.dt` no longer performs frequency inference, yielding a large speedup when accessing the attribute (:issue:`17210`) - Improved performance of :meth:`Categorical.set_categories` by not materializing the values (:issue:`17508`) - :attr:`Timestamp.microsecond` no longer re-computes on attribute access (:issue:`17331`) +- Improved performance of the :class:`CategoricalIndex` for data that is already categorical dtype (:issue:`17513`) .. _whatsnew_0210.bug_fixes: diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 71cd4790ac364..ef1dc4d971f37 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -130,6 +130,10 @@ def _create_categorical(self, data, categories=None, ordered=None): ------- Categorical """ + if (isinstance(data, (ABCSeries, type(self))) and + is_categorical_dtype(data)): + data = data.values + if not isinstance(data, ABCCategorical): ordered = False if ordered is None else ordered from pandas.core.categorical import Categorical diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index aac68ebd6abed..cf365465763fa 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -125,6 +125,16 @@ def test_construction_with_dtype(self): result = CategoricalIndex(idx, categories=idx, ordered=True) tm.assert_index_equal(result, expected, exact=True) + def test_create_categorical(self): + # https://github.com/pandas-dev/pandas/pull/17513 + # The public CI constructor doesn't hit this code path with + # instances of CategoricalIndex, but we still want to test the code + ci = CategoricalIndex(['a', 'b', 'c']) + # First ci is self, second ci is data. + result = CategoricalIndex._create_categorical(ci, ci) + expected = Categorical(['a', 'b', 'c']) + tm.assert_categorical_equal(result, expected) + def test_disallow_set_ops(self): # GH 10039 From 9b21c5456eb4b2cdbc7f74569c4b8660ada951fe Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 14 Sep 2017 18:33:03 -0700 Subject: [PATCH 51/57] Remove unnecessary iNaT checks from _Period properties (#17421) --- asv_bench/benchmarks/period.py | 59 +++++++++++++++ pandas/_libs/period.pyx | 127 ++++++++++++++++++++------------- 2 files changed, 135 insertions(+), 51 deletions(-) diff --git a/asv_bench/benchmarks/period.py b/asv_bench/benchmarks/period.py index 78d66295f28cc..df3c2bf3e4b46 100644 --- a/asv_bench/benchmarks/period.py +++ b/asv_bench/benchmarks/period.py @@ -78,6 +78,65 @@ def time_value_counts_pindex(self): self.i.value_counts() +class Properties(object): + def setup(self): + self.per = Period('2017-09-06 08:28', freq='min') + + def time_year(self): + self.per.year + + def time_month(self): + self.per.month + + def time_day(self): + self.per.day + + def time_hour(self): + self.per.hour + + def time_minute(self): + self.per.minute + + def time_second(self): + self.per.second + + def time_is_leap_year(self): + self.per.is_leap_year + + def time_quarter(self): + self.per.quarter + + def time_qyear(self): + self.per.qyear + + def time_week(self): + self.per.week + + def time_daysinmonth(self): + self.per.daysinmonth + + def time_dayofweek(self): + self.per.dayofweek + + def time_dayofyear(self): + self.per.dayofyear + + def time_start_time(self): + self.per.start_time + + def time_end_time(self): + self.per.end_time + + def time_to_timestamp(): + self.per.to_timestamp() + + def time_now(): + self.per.now() + + def time_asfreq(): + self.per.asfreq('A') + + class period_standard_indexing(object): goal_time = 0.2 diff --git a/pandas/_libs/period.pyx b/pandas/_libs/period.pyx index 9e473a7f362b4..babe0f7c6834d 100644 --- a/pandas/_libs/period.pyx +++ b/pandas/_libs/period.pyx @@ -107,6 +107,8 @@ cdef extern from "period_helper.h": int pday(int64_t ordinal, int freq) except INT32_MIN int pweekday(int64_t ordinal, int freq) except INT32_MIN int pday_of_week(int64_t ordinal, int freq) except INT32_MIN + # TODO: pday_of_week and pweekday are identical. Make one an alias instead + # of importing them separately. int pday_of_year(int64_t ordinal, int freq) except INT32_MIN int pweek(int64_t ordinal, int freq) except INT32_MIN int phour(int64_t ordinal, int freq) except INT32_MIN @@ -868,58 +870,81 @@ cdef class _Period(object): dt64 = period_ordinal_to_dt64(val.ordinal, base) return Timestamp(dt64, tz=tz) - cdef _field(self, alias): + @property + def year(self): + base, mult = get_freq_code(self.freq) + return pyear(self.ordinal, base) + + @property + def month(self): + base, mult = get_freq_code(self.freq) + return pmonth(self.ordinal, base) + + @property + def day(self): + base, mult = get_freq_code(self.freq) + return pday(self.ordinal, base) + + @property + def hour(self): + base, mult = get_freq_code(self.freq) + return phour(self.ordinal, base) + + @property + def minute(self): + base, mult = get_freq_code(self.freq) + return pminute(self.ordinal, base) + + @property + def second(self): + base, mult = get_freq_code(self.freq) + return psecond(self.ordinal, base) + + @property + def weekofyear(self): + base, mult = get_freq_code(self.freq) + return pweek(self.ordinal, base) + + @property + def week(self): + return self.weekofyear + + @property + def dayofweek(self): + base, mult = get_freq_code(self.freq) + return pweekday(self.ordinal, base) + + @property + def weekday(self): + return self.dayofweek + + @property + def dayofyear(self): + base, mult = get_freq_code(self.freq) + return pday_of_year(self.ordinal, base) + + @property + def quarter(self): base, mult = get_freq_code(self.freq) - return get_period_field(alias, self.ordinal, base) - - property year: - def __get__(self): - return self._field(0) - property month: - def __get__(self): - return self._field(3) - property day: - def __get__(self): - return self._field(4) - property hour: - def __get__(self): - return self._field(5) - property minute: - def __get__(self): - return self._field(6) - property second: - def __get__(self): - return self._field(7) - property weekofyear: - def __get__(self): - return self._field(8) - property week: - def __get__(self): - return self.weekofyear - property dayofweek: - def __get__(self): - return self._field(10) - property weekday: - def __get__(self): - return self.dayofweek - property dayofyear: - def __get__(self): - return self._field(9) - property quarter: - def __get__(self): - return self._field(2) - property qyear: - def __get__(self): - return self._field(1) - property days_in_month: - def __get__(self): - return self._field(11) - property daysinmonth: - def __get__(self): - return self.days_in_month - property is_leap_year: - def __get__(self): - return bool(is_leapyear(self._field(0))) + return pquarter(self.ordinal, base) + + @property + def qyear(self): + base, mult = get_freq_code(self.freq) + return pqyear(self.ordinal, base) + + @property + def days_in_month(self): + base, mult = get_freq_code(self.freq) + return pdays_in_month(self.ordinal, base) + + @property + def daysinmonth(self): + return self.days_in_month + + @property + def is_leap_year(self): + return bool(is_leapyear(self.year)) @classmethod def now(cls, freq=None): From 72c38883f09c6902863345de432d3c90a29140b3 Mon Sep 17 00:00:00 2001 From: jschendel Date: Fri, 15 Sep 2017 02:18:24 -0600 Subject: [PATCH 52/57] CLN: Fix Spelling Errors (#17535) --- doc/source/advanced.rst | 10 +++++----- doc/source/api.rst | 2 +- doc/source/basics.rst | 2 +- doc/source/computation.rst | 2 +- doc/source/groupby.rst | 4 ++-- doc/source/indexing.rst | 2 +- doc/source/io.rst | 2 +- doc/source/merging.rst | 6 +++--- doc/source/missing_data.rst | 2 +- doc/source/options.rst | 4 ++-- doc/source/reshaping.rst | 2 +- doc/source/sparse.rst | 2 +- doc/source/style.ipynb | 2 +- doc/source/timeseries.rst | 18 +++++++++--------- doc/source/visualization.rst | 2 +- pandas/core/algorithms.py | 2 +- pandas/core/indexes/interval.py | 2 +- pandas/core/reshape/concat.py | 2 +- pandas/core/reshape/merge.py | 6 +++--- pandas/core/reshape/tile.py | 2 +- pandas/io/formats/excel.py | 4 ++-- pandas/io/pytables.py | 12 ++++++------ pandas/io/stata.py | 4 ++-- pandas/plotting/_misc.py | 2 +- pandas/plotting/_tools.py | 2 +- pandas/tests/frame/test_convert_to.py | 4 ++-- pandas/tests/groupby/test_transform.py | 2 +- pandas/tests/indexes/datetimes/test_tools.py | 2 +- pandas/tests/io/json/test_json_table_schema.py | 2 +- pandas/tests/io/parser/test_read_fwf.py | 2 +- pandas/tests/io/test_pytables.py | 8 ++++---- pandas/tests/plotting/test_datetimelike.py | 2 +- pandas/tests/series/test_dtypes.py | 2 +- pandas/tests/test_categorical.py | 2 +- pandas/tests/test_sorting.py | 2 +- pandas/tseries/util.py | 2 +- 36 files changed, 65 insertions(+), 65 deletions(-) diff --git a/doc/source/advanced.rst b/doc/source/advanced.rst index 3f145cf955664..3bda8c7eacb61 100644 --- a/doc/source/advanced.rst +++ b/doc/source/advanced.rst @@ -625,7 +625,7 @@ Index Types We have discussed ``MultiIndex`` in the previous sections pretty extensively. ``DatetimeIndex`` and ``PeriodIndex`` are shown :ref:`here `. ``TimedeltaIndex`` are :ref:`here `. -In the following sub-sections we will highlite some other index types. +In the following sub-sections we will highlight some other index types. .. _indexing.categoricalindex: @@ -645,7 +645,7 @@ and allows efficient indexing and storage of an index with a large number of dup df.dtypes df.B.cat.categories -Setting the index, will create create a ``CategoricalIndex`` +Setting the index, will create a ``CategoricalIndex`` .. ipython:: python @@ -681,7 +681,7 @@ Groupby operations on the index will preserve the index nature as well Reindexing operations, will return a resulting index based on the type of the passed indexer, meaning that passing a list will return a plain-old-``Index``; indexing with a ``Categorical`` will return a ``CategoricalIndex``, indexed according to the categories -of the PASSED ``Categorical`` dtype. This allows one to arbitrarly index these even with +of the PASSED ``Categorical`` dtype. This allows one to arbitrarily index these even with values NOT in the categories, similarly to how you can reindex ANY pandas index. .. ipython :: python @@ -722,7 +722,7 @@ Int64Index and RangeIndex Prior to 0.18.0, the ``Int64Index`` would provide the default index for all ``NDFrame`` objects. ``RangeIndex`` is a sub-class of ``Int64Index`` added in version 0.18.0, now providing the default index for all ``NDFrame`` objects. -``RangeIndex`` is an optimized version of ``Int64Index`` that can represent a monotonic ordered set. These are analagous to python `range types `__. +``RangeIndex`` is an optimized version of ``Int64Index`` that can represent a monotonic ordered set. These are analogous to python `range types `__. .. _indexing.float64index: @@ -963,7 +963,7 @@ index can be somewhat complicated. For example, the following does not work: s.loc['c':'e'+1] A very common use case is to limit a time series to start and end at two -specific dates. To enable this, we made the design design to make label-based +specific dates. To enable this, we made the design to make label-based slicing include both endpoints: .. ipython:: python diff --git a/doc/source/api.rst b/doc/source/api.rst index 1541bbccefe21..4e02f7b11f466 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -1291,7 +1291,7 @@ Index ----- **Many of these methods or variants thereof are available on the objects -that contain an index (Series/Dataframe) and those should most likely be +that contain an index (Series/DataFrame) and those should most likely be used before calling these methods directly.** .. autosummary:: diff --git a/doc/source/basics.rst b/doc/source/basics.rst index 42c28df3a6030..0990d2bd15ee6 100644 --- a/doc/source/basics.rst +++ b/doc/source/basics.rst @@ -923,7 +923,7 @@ Passing a named function will yield that name for the row: Aggregating with a dict +++++++++++++++++++++++ -Passing a dictionary of column names to a scalar or a list of scalars, to ``DataFame.agg`` +Passing a dictionary of column names to a scalar or a list of scalars, to ``DataFrame.agg`` allows you to customize which functions are applied to which columns. Note that the results are not in any particular order, you can use an ``OrderedDict`` instead to guarantee ordering. diff --git a/doc/source/computation.rst b/doc/source/computation.rst index 23699393958cf..14cfdbc364837 100644 --- a/doc/source/computation.rst +++ b/doc/source/computation.rst @@ -654,7 +654,7 @@ aggregation with, outputting a DataFrame: r['A'].agg([np.sum, np.mean, np.std]) -On a widowed DataFrame, you can pass a list of functions to apply to each +On a windowed DataFrame, you can pass a list of functions to apply to each column, which produces an aggregated result with a hierarchical index: .. ipython:: python diff --git a/doc/source/groupby.rst b/doc/source/groupby.rst index e1231b9a4a200..e9a7d8dd0a46e 100644 --- a/doc/source/groupby.rst +++ b/doc/source/groupby.rst @@ -561,7 +561,7 @@ must be either implemented on GroupBy or available via :ref:`dispatching .. note:: - If you pass a dict to ``aggregate``, the ordering of the output colums is + If you pass a dict to ``aggregate``, the ordering of the output columns is non-deterministic. If you want to be sure the output columns will be in a specific order, you can use an ``OrderedDict``. Compare the output of the following two commands: @@ -1211,7 +1211,7 @@ Groupby by Indexer to 'resample' data Resampling produces new hypothetical samples (resamples) from already existing observed data or from a model that generates data. These new samples are similar to the pre-existing samples. -In order to resample to work on indices that are non-datetimelike , the following procedure can be utilized. +In order to resample to work on indices that are non-datetimelike, the following procedure can be utilized. In the following examples, **df.index // 5** returns a binary array which is used to determine what gets selected for the groupby operation. diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst index 8474116c38082..edbc4e6d7fd22 100644 --- a/doc/source/indexing.rst +++ b/doc/source/indexing.rst @@ -714,7 +714,7 @@ Finally, one can also set a seed for ``sample``'s random number generator using Setting With Enlargement ------------------------ -The ``.loc/[]`` operations can perform enlargement when setting a non-existant key for that axis. +The ``.loc/[]`` operations can perform enlargement when setting a non-existent key for that axis. In the ``Series`` case this is effectively an appending operation diff --git a/doc/source/io.rst b/doc/source/io.rst index 8fbb23769492e..fcf7f6029197b 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -3077,7 +3077,7 @@ Compressed pickle files .. versionadded:: 0.20.0 -:func:`read_pickle`, :meth:`DataFame.to_pickle` and :meth:`Series.to_pickle` can read +:func:`read_pickle`, :meth:`DataFrame.to_pickle` and :meth:`Series.to_pickle` can read and write compressed pickle files. The compression types of ``gzip``, ``bz2``, ``xz`` are supported for reading and writing. `zip`` file supports read only and must contain only one data file to be read in. diff --git a/doc/source/merging.rst b/doc/source/merging.rst index a5ee1b1a9384c..72787ea97a782 100644 --- a/doc/source/merging.rst +++ b/doc/source/merging.rst @@ -1329,7 +1329,7 @@ By default we are taking the asof of the quotes. on='time', by='ticker') -We only asof within ``2ms`` betwen the quote time and the trade time. +We only asof within ``2ms`` between the quote time and the trade time. .. ipython:: python @@ -1338,8 +1338,8 @@ We only asof within ``2ms`` betwen the quote time and the trade time. by='ticker', tolerance=pd.Timedelta('2ms')) -We only asof within ``10ms`` betwen the quote time and the trade time and we exclude exact matches on time. -Note that though we exclude the exact matches (of the quotes), prior quotes DO propogate to that point +We only asof within ``10ms`` between the quote time and the trade time and we exclude exact matches on time. +Note that though we exclude the exact matches (of the quotes), prior quotes DO propagate to that point in time. .. ipython:: python diff --git a/doc/source/missing_data.rst b/doc/source/missing_data.rst index 65b411ccd4af2..b33b5c304853a 100644 --- a/doc/source/missing_data.rst +++ b/doc/source/missing_data.rst @@ -320,7 +320,7 @@ Interpolation The ``limit_direction`` keyword argument was added. -Both Series and Dataframe objects have an ``interpolate`` method that, by default, +Both Series and DataFrame objects have an ``interpolate`` method that, by default, performs linear interpolation at missing datapoints. .. ipython:: python diff --git a/doc/source/options.rst b/doc/source/options.rst index 1592caf90546c..f042e4d3f5120 100644 --- a/doc/source/options.rst +++ b/doc/source/options.rst @@ -313,9 +313,9 @@ display.large_repr truncate For DataFrames exceeding max_ro display.latex.repr False Whether to produce a latex DataFrame representation for jupyter frontends that support it. -display.latex.escape True Escapes special caracters in Dataframes, when +display.latex.escape True Escapes special characters in DataFrames, when using the to_latex method. -display.latex.longtable False Specifies if the to_latex method of a Dataframe +display.latex.longtable False Specifies if the to_latex method of a DataFrame uses the longtable format. display.latex.multicolumn True Combines columns when using a MultiIndex display.latex.multicolumn_format 'l' Alignment of multicolumn labels diff --git a/doc/source/reshaping.rst b/doc/source/reshaping.rst index fab83222b313f..1209c4a8d6be8 100644 --- a/doc/source/reshaping.rst +++ b/doc/source/reshaping.rst @@ -156,7 +156,7 @@ the level numbers: stacked.unstack('second') Notice that the ``stack`` and ``unstack`` methods implicitly sort the index -levels involved. Hence a call to ``stack`` and then ``unstack``, or viceversa, +levels involved. Hence a call to ``stack`` and then ``unstack``, or vice versa, will result in a **sorted** copy of the original DataFrame or Series: .. ipython:: python diff --git a/doc/source/sparse.rst b/doc/source/sparse.rst index cf16cee501a3e..89efa7b4be3ee 100644 --- a/doc/source/sparse.rst +++ b/doc/source/sparse.rst @@ -132,7 +132,7 @@ dtype, ``fill_value`` default changes: s.to_sparse() You can change the dtype using ``.astype()``, the result is also sparse. Note that -``.astype()`` also affects to the ``fill_value`` to keep its dense represantation. +``.astype()`` also affects to the ``fill_value`` to keep its dense representation. .. ipython:: python diff --git a/doc/source/style.ipynb b/doc/source/style.ipynb index c250787785e14..1d6ce163cf977 100644 --- a/doc/source/style.ipynb +++ b/doc/source/style.ipynb @@ -169,7 +169,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Notice the similarity with the standard `df.applymap`, which operates on DataFrames elementwise. We want you to be able to resuse your existing knowledge of how to interact with DataFrames.\n", + "Notice the similarity with the standard `df.applymap`, which operates on DataFrames elementwise. We want you to be able to reuse your existing knowledge of how to interact with DataFrames.\n", "\n", "Notice also that our function returned a string containing the CSS attribute and value, separated by a colon just like in a `