diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 7664688ffa4f4..4c0594c024774 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -593,6 +593,76 @@ result. On the other hand, this might have backward incompatibilities: e.g. compared to numpy arrays, ``Index`` objects are not mutable. To get the original ndarray, you can always convert explicitly using ``np.asarray(idx.hour)``. +.. _whatsnew_0200.api_breaking.unique: + +pd.unique will now be consistent with extension types +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In prior versions, using ``Series.unique()`` and ``pd.unique(Series)`` on ``Categorical`` and tz-aware +datatypes would yield different return types. These are now made consistent. (:issue:`15903`) + +- Datetime tz-aware + + Previous behaviour: + + .. code-block:: ipython + + # Series + In [5]: pd.Series([pd.Timestamp('20160101', tz='US/Eastern'), + pd.Timestamp('20160101', tz='US/Eastern')]).unique() + Out[5]: array([Timestamp('2016-01-01 00:00:00-0500', tz='US/Eastern')], dtype=object) + + In [6]: pd.unique(pd.Series([pd.Timestamp('20160101', tz='US/Eastern'), + pd.Timestamp('20160101', tz='US/Eastern')])) + Out[6]: array(['2016-01-01T05:00:00.000000000'], dtype='datetime64[ns]') + + # Index + In [7]: pd.Index([pd.Timestamp('20160101', tz='US/Eastern'), + pd.Timestamp('20160101', tz='US/Eastern')]).unique() + Out[7]: DatetimeIndex(['2016-01-01 00:00:00-05:00'], dtype='datetime64[ns, US/Eastern]', freq=None) + + In [8]: pd.unique([pd.Timestamp('20160101', tz='US/Eastern'), + pd.Timestamp('20160101', tz='US/Eastern')]) + Out[8]: array(['2016-01-01T05:00:00.000000000'], dtype='datetime64[ns]') + + New Behavior: + + .. ipython:: python + + # Series, returns an array of Timestamp tz-aware + pd.Series([pd.Timestamp('20160101', tz='US/Eastern'), + pd.Timestamp('20160101', tz='US/Eastern')]).unique() + pd.unique(pd.Series([pd.Timestamp('20160101', tz='US/Eastern'), + pd.Timestamp('20160101', tz='US/Eastern')])) + + # Index, returns a DatetimeIndex + pd.Index([pd.Timestamp('20160101', tz='US/Eastern'), + pd.Timestamp('20160101', tz='US/Eastern')]).unique() + pd.unique(pd.Index([pd.Timestamp('20160101', tz='US/Eastern'), + pd.Timestamp('20160101', tz='US/Eastern')])) + +- Categoricals + + Previous behaviour: + + .. code-block:: ipython + + In [1]: pd.Series(pd.Categorical(list('baabc'))).unique() + Out[1]: + [b, a, c] + Categories (3, object): [b, a, c] + + In [2]: pd.unique(pd.Series(pd.Categorical(list('baabc')))) + Out[2]: array(['b', 'a', 'c'], dtype=object) + + New Behavior: + + .. ipython:: python + + # returns a Categorical + pd.Series(pd.Categorical(list('baabc'))).unique() + pd.unique(pd.Series(pd.Categorical(list('baabc'))).unique()) + .. _whatsnew_0200.api_breaking.s3: S3 File Handling @@ -1148,6 +1218,7 @@ Conversion - Bug in ``DataFrame`` construction with nulls and datetimes in a list-like (:issue:`15869`) - Bug in ``DataFrame.fillna()`` with tz-aware datetimes (:issue:`15855`) - Bug in ``is_string_dtype``, ``is_timedelta64_ns_dtype``, and ``is_string_like_dtype`` in which an error was raised when ``None`` was passed in (:issue:`15941`) +- Bug in the return type of ``pd.unique`` on a ``Categorical``, which was returning an ndarray and not a ``Categorical`` (:issue:`15903`) Indexing ^^^^^^^^ diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 9b88ea23483bd..654e38e43b6c0 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -267,11 +267,85 @@ def match(to_match, values, na_sentinel=-1): return result -def unique1d(values): +def unique(values): """ - Hash table-based unique + Hash table-based unique. Uniques are returned in order + of appearance. This does NOT sort. + + Significantly faster than numpy.unique. Includes NA values. + + Parameters + ---------- + values : 1d array-like + + Returns + ------- + unique values. + - If the input is an Index, the return is an Index + - If the input is a Categorical dtype, the return is a Categorical + - If the input is a Series/ndarray, the return will be an ndarray + + Examples + -------- + pd.unique(pd.Series([2, 1, 3, 3])) + array([2, 1, 3]) + + >>> pd.unique(pd.Series([2] + [1] * 5)) + array([2, 1]) + + >>> pd.unique(Series([pd.Timestamp('20160101'), + ... pd.Timestamp('20160101')])) + array(['2016-01-01T00:00:00.000000000'], dtype='datetime64[ns]') + + >>> pd.unique(pd.Series([pd.Timestamp('20160101', tz='US/Eastern'), + ... pd.Timestamp('20160101', tz='US/Eastern')])) + array([Timestamp('2016-01-01 00:00:00-0500', tz='US/Eastern')], + dtype=object) + + >>> pd.unique(pd.Index([pd.Timestamp('20160101', tz='US/Eastern'), + ... pd.Timestamp('20160101', tz='US/Eastern')])) + DatetimeIndex(['2016-01-01 00:00:00-05:00'], + ... dtype='datetime64[ns, US/Eastern]', freq=None) + + >>> pd.unique(list('baabc')) + array(['b', 'a', 'c'], dtype=object) + + An unordered Categorical will return categories in the + order of appearance. + + >>> pd.unique(Series(pd.Categorical(list('baabc')))) + [b, a, c] + Categories (3, object): [b, a, c] + + >>> pd.unique(Series(pd.Categorical(list('baabc'), + ... categories=list('abc')))) + [b, a, c] + Categories (3, object): [b, a, c] + + An ordered Categorical preserves the category ordering. + + >>> pd.unique(Series(pd.Categorical(list('baabc'), + ... categories=list('abc'), + ... ordered=True))) + [b, a, c] + Categories (3, object): [a < b < c] + + See Also + -------- + pandas.Index.unique + pandas.Series.unique + """ + values = _ensure_arraylike(values) + + # categorical is a fast-path + # this will coerce Categorical, CategoricalIndex, + # and category dtypes Series to same return of Category + if is_categorical_dtype(values): + values = getattr(values, '.values', values) + return values.unique() + original = values htable, _, values, dtype, ndtype = _get_hashtable_algo(values) @@ -279,10 +353,17 @@ def unique1d(values): uniques = table.unique(values) uniques = _reconstruct_data(uniques, dtype, original) + if isinstance(original, ABCSeries) and is_datetime64tz_dtype(dtype): + # we are special casing datetime64tz_dtype + # to return an object array of tz-aware Timestamps + + # TODO: it must return DatetimeArray with tz in pandas 2.0 + uniques = uniques.asobject.values + return uniques -unique = unique1d +unique1d = unique def isin(comps, values): @@ -651,7 +732,7 @@ def mode(values): if is_categorical_dtype(values): if isinstance(values, Series): - return Series(values.values.mode()) + return Series(values.values.mode(), name=values.name) return values.mode() values, dtype, ndtype = _ensure_data(values) diff --git a/pandas/core/base.py b/pandas/core/base.py index 3401c7c59cb56..56bdeee6982d5 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -855,13 +855,24 @@ def value_counts(self, normalize=False, sort=True, ascending=False, _shared_docs['unique'] = ( """ - Return %(unique)s of unique values in the object. - Significantly faster than numpy.unique. Includes NA values. - The order of the original is preserved. + Return unique values in the object. Uniques are returned in order + of appearance, this does NOT sort. Hash table-based unique. + + Parameters + ---------- + values : 1d array-like Returns ------- - uniques : %(unique)s + unique values. + - If the input is an Index, the return is an Index + - If the input is a Categorical dtype, the return is a Categorical + - If the input is a Series/ndarray, the return will be an ndarray + + See Also + -------- + pandas.unique + pandas.Categorical.unique """) @Appender(_shared_docs['unique'] % _indexops_doc_kwargs) @@ -873,6 +884,7 @@ def unique(self): else: from pandas.core.algorithms import unique1d result = unique1d(values) + return result def nunique(self, dropna=True): diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index 0fcf8664e755d..e3d6792604c4c 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -1895,6 +1895,33 @@ def unique(self): Returns ------- unique values : ``Categorical`` + + Examples + -------- + An unordered Categorical will return categories in the + order of appearance. + + >>> pd.Categorical(list('baabc')) + [b, a, c] + Categories (3, object): [b, a, c] + + >>> pd.Categorical(list('baabc'), categories=list('abc')) + [b, a, c] + Categories (3, object): [b, a, c] + + An ordered Categorical preserves the category ordering. + + >>> pd.Categorical(list('baabc'), + ... categories=list('abc'), + ... ordered=True) + [b, a, c] + Categories (3, object): [a < b < c] + + See Also + -------- + pandas.unique + pandas.CategoricalIndex.unique + """ # unlike np.unique, unique1d does not sort diff --git a/pandas/core/series.py b/pandas/core/series.py index 760abc20351cf..5ee3ca73742ae 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1204,10 +1204,14 @@ def mode(self): @Appender(base._shared_docs['unique'] % _shared_doc_kwargs) def unique(self): result = super(Series, self).unique() + if is_datetime64tz_dtype(self.dtype): - # to return array of Timestamp with tz - # ToDo: it must return DatetimeArray with tz in pandas 2.0 - return result.asobject.values + # we are special casing datetime64tz_dtype + # to return an object array of tz-aware Timestamps + + # TODO: it must return DatetimeArray with tz in pandas 2.0 + result = result.asobject.values + return result @Appender(base._shared_docs['drop_duplicates'] % _shared_doc_kwargs) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index d893183dae0ed..d9f81968c684d 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -6,7 +6,8 @@ from numpy import nan from datetime import datetime from itertools import permutations -from pandas import Series, Categorical, CategoricalIndex, Index +from pandas import (Series, Categorical, CategoricalIndex, Index, + Timestamp, DatetimeIndex) import pandas as pd from pandas import compat @@ -34,7 +35,7 @@ def test_ints(self): expected = Series(np.array([0, 2, 1, 1, 0, 2, np.nan, 0])) tm.assert_series_equal(result, expected) - s = pd.Series(np.arange(5), dtype=np.float32) + s = Series(np.arange(5), dtype=np.float32) result = algos.match(s, [2, 4]) expected = np.array([-1, -1, 0, -1, 1], dtype=np.int64) self.assert_numpy_array_equal(result, expected) @@ -204,20 +205,20 @@ def test_mixed(self): def test_datelike(self): # M8 - v1 = pd.Timestamp('20130101 09:00:00.00004') - v2 = pd.Timestamp('20130101') + v1 = Timestamp('20130101 09:00:00.00004') + v2 = Timestamp('20130101') x = Series([v1, v1, v1, v2, v2, v1]) labels, uniques = algos.factorize(x) exp = np.array([0, 0, 0, 1, 1, 0], dtype=np.intp) self.assert_numpy_array_equal(labels, exp) - exp = pd.DatetimeIndex([v1, v2]) + exp = DatetimeIndex([v1, v2]) self.assert_index_equal(uniques, exp) labels, uniques = algos.factorize(x, sort=True) exp = np.array([1, 1, 1, 0, 0, 1], dtype=np.intp) self.assert_numpy_array_equal(labels, exp) - exp = pd.DatetimeIndex([v2, v1]) + exp = DatetimeIndex([v2, v1]) self.assert_index_equal(uniques, exp) # period @@ -350,7 +351,7 @@ def test_datetime64_dtype_array_returned(self): tm.assert_numpy_array_equal(result, expected) self.assertEqual(result.dtype, expected.dtype) - s = pd.Series(dt_index) + s = Series(dt_index) result = algos.unique(s) tm.assert_numpy_array_equal(result, expected) self.assertEqual(result.dtype, expected.dtype) @@ -369,7 +370,7 @@ def test_timedelta64_dtype_array_returned(self): tm.assert_numpy_array_equal(result, expected) self.assertEqual(result.dtype, expected.dtype) - s = pd.Series(td_index) + s = Series(td_index) result = algos.unique(s) tm.assert_numpy_array_equal(result, expected) self.assertEqual(result.dtype, expected.dtype) @@ -380,10 +381,119 @@ def test_timedelta64_dtype_array_returned(self): self.assertEqual(result.dtype, expected.dtype) def test_uint64_overflow(self): - s = pd.Series([1, 2, 2**63, 2**63], dtype=np.uint64) + s = Series([1, 2, 2**63, 2**63], dtype=np.uint64) exp = np.array([1, 2, 2**63], dtype=np.uint64) tm.assert_numpy_array_equal(algos.unique(s), exp) + def test_categorical(self): + + # we are expecting to return in the order + # of appearance + expected = pd.Categorical(list('bac'), + categories=list('bac')) + + # we are expecting to return in the order + # of the categories + expected_o = pd.Categorical(list('bac'), + categories=list('abc'), + ordered=True) + + # GH 15939 + c = pd.Categorical(list('baabc')) + result = c.unique() + tm.assert_categorical_equal(result, expected) + + result = algos.unique(c) + tm.assert_categorical_equal(result, expected) + + c = pd.Categorical(list('baabc'), ordered=True) + result = c.unique() + tm.assert_categorical_equal(result, expected_o) + + result = algos.unique(c) + tm.assert_categorical_equal(result, expected_o) + + # Series of categorical dtype + s = Series(pd.Categorical(list('baabc')), name='foo') + result = s.unique() + tm.assert_categorical_equal(result, expected) + + result = pd.unique(s) + tm.assert_categorical_equal(result, expected) + + # CI -> return CI + ci = pd.CategoricalIndex(pd.Categorical(list('baabc'), + categories=list('bac'))) + expected = pd.CategoricalIndex(expected) + result = ci.unique() + tm.assert_index_equal(result, expected) + + result = pd.unique(ci) + tm.assert_index_equal(result, expected) + + def test_datetime64tz_aware(self): + # GH 15939 + + result = Series( + pd.Index([Timestamp('20160101', tz='US/Eastern'), + Timestamp('20160101', tz='US/Eastern')])).unique() + expected = np.array([Timestamp('2016-01-01 00:00:00-0500', + tz='US/Eastern')], dtype=object) + tm.assert_numpy_array_equal(result, expected) + + result = pd.Index([Timestamp('20160101', tz='US/Eastern'), + Timestamp('20160101', tz='US/Eastern')]).unique() + expected = DatetimeIndex(['2016-01-01 00:00:00'], + dtype='datetime64[ns, US/Eastern]', freq=None) + tm.assert_index_equal(result, expected) + + result = pd.unique( + Series(pd.Index([Timestamp('20160101', tz='US/Eastern'), + Timestamp('20160101', tz='US/Eastern')]))) + expected = np.array([Timestamp('2016-01-01 00:00:00-0500', + tz='US/Eastern')], dtype=object) + tm.assert_numpy_array_equal(result, expected) + + result = pd.unique(pd.Index([Timestamp('20160101', tz='US/Eastern'), + Timestamp('20160101', tz='US/Eastern')])) + expected = DatetimeIndex(['2016-01-01 00:00:00'], + dtype='datetime64[ns, US/Eastern]', freq=None) + tm.assert_index_equal(result, expected) + + def test_order_of_appearance(self): + # 9346 + # light testing of guarantee of order of appearance + # these also are the doc-examples + result = pd.unique(Series([2, 1, 3, 3])) + tm.assert_numpy_array_equal(result, + np.array([2, 1, 3], dtype='int64')) + + result = pd.unique(Series([2] + [1] * 5)) + tm.assert_numpy_array_equal(result, + np.array([2, 1], dtype='int64')) + + result = pd.unique(Series([Timestamp('20160101'), + Timestamp('20160101')])) + expected = np.array(['2016-01-01T00:00:00.000000000'], + dtype='datetime64[ns]') + tm.assert_numpy_array_equal(result, expected) + + result = pd.unique(pd.Index( + [Timestamp('20160101', tz='US/Eastern'), + Timestamp('20160101', tz='US/Eastern')])) + expected = DatetimeIndex(['2016-01-01 00:00:00'], + dtype='datetime64[ns, US/Eastern]', + freq=None) + tm.assert_index_equal(result, expected) + + result = pd.unique(list('aabc')) + expected = np.array(['a', 'b', 'c'], dtype=object) + tm.assert_numpy_array_equal(result, expected) + + result = pd.unique(Series(pd.Categorical(list('aabc')))) + expected = pd.Categorical(list('abc')) + tm.assert_categorical_equal(result, expected) + class TestIsin(tm.TestCase): @@ -403,15 +513,15 @@ def test_basic(self): expected = np.array([True, False]) tm.assert_numpy_array_equal(result, expected) - result = algos.isin(pd.Series([1, 2]), [1]) + result = algos.isin(Series([1, 2]), [1]) expected = np.array([True, False]) tm.assert_numpy_array_equal(result, expected) - result = algos.isin(pd.Series([1, 2]), pd.Series([1])) + result = algos.isin(Series([1, 2]), Series([1])) expected = np.array([True, False]) tm.assert_numpy_array_equal(result, expected) - result = algos.isin(pd.Series([1, 2]), set([1])) + result = algos.isin(Series([1, 2]), set([1])) expected = np.array([True, False]) tm.assert_numpy_array_equal(result, expected) @@ -419,11 +529,11 @@ def test_basic(self): expected = np.array([True, False]) tm.assert_numpy_array_equal(result, expected) - result = algos.isin(pd.Series(['a', 'b']), pd.Series(['a'])) + result = algos.isin(Series(['a', 'b']), Series(['a'])) expected = np.array([True, False]) tm.assert_numpy_array_equal(result, expected) - result = algos.isin(pd.Series(['a', 'b']), set(['a'])) + result = algos.isin(Series(['a', 'b']), set(['a'])) expected = np.array([True, False]) tm.assert_numpy_array_equal(result, expected) @@ -520,33 +630,33 @@ def test_value_counts_nat(self): self.assertEqual(len(vc), 1) self.assertEqual(len(vc_with_na), 2) - exp_dt = pd.Series({pd.Timestamp('2014-01-01 00:00:00'): 1}) + exp_dt = Series({Timestamp('2014-01-01 00:00:00'): 1}) tm.assert_series_equal(algos.value_counts(dt), exp_dt) # TODO same for (timedelta) def test_value_counts_datetime_outofbounds(self): # GH 13663 - s = pd.Series([datetime(3000, 1, 1), datetime(5000, 1, 1), - datetime(5000, 1, 1), datetime(6000, 1, 1), - datetime(3000, 1, 1), datetime(3000, 1, 1)]) + s = Series([datetime(3000, 1, 1), datetime(5000, 1, 1), + datetime(5000, 1, 1), datetime(6000, 1, 1), + datetime(3000, 1, 1), datetime(3000, 1, 1)]) res = s.value_counts() exp_index = pd.Index([datetime(3000, 1, 1), datetime(5000, 1, 1), datetime(6000, 1, 1)], dtype=object) - exp = pd.Series([3, 2, 1], index=exp_index) + exp = Series([3, 2, 1], index=exp_index) tm.assert_series_equal(res, exp) # GH 12424 - res = pd.to_datetime(pd.Series(['2362-01-01', np.nan]), + res = pd.to_datetime(Series(['2362-01-01', np.nan]), errors='ignore') - exp = pd.Series(['2362-01-01', np.nan], dtype=object) + exp = Series(['2362-01-01', np.nan], dtype=object) tm.assert_series_equal(res, exp) def test_categorical(self): s = Series(pd.Categorical(list('aaabbc'))) result = s.value_counts() - expected = pd.Series([3, 2, 1], - index=pd.CategoricalIndex(['a', 'b', 'c'])) + expected = Series([3, 2, 1], + index=pd.CategoricalIndex(['a', 'b', 'c'])) tm.assert_series_equal(result, expected, check_index_type=True) # preserve order? @@ -559,11 +669,11 @@ def test_categorical_nans(self): s = Series(pd.Categorical(list('aaaaabbbcc'))) # 4,3,2,1 (nan) s.iloc[1] = np.nan result = s.value_counts() - expected = pd.Series([4, 3, 2], index=pd.CategoricalIndex( + expected = Series([4, 3, 2], index=pd.CategoricalIndex( ['a', 'b', 'c'], categories=['a', 'b', 'c'])) tm.assert_series_equal(result, expected, check_index_type=True) result = s.value_counts(dropna=False) - expected = pd.Series([ + expected = Series([ 4, 3, 2, 1 ], index=pd.CategoricalIndex(['a', 'b', 'c', np.nan])) tm.assert_series_equal(result, expected, check_index_type=True) @@ -573,12 +683,12 @@ def test_categorical_nans(self): list('aaaaabbbcc'), ordered=True, categories=['b', 'a', 'c'])) s.iloc[1] = np.nan result = s.value_counts() - expected = pd.Series([4, 3, 2], index=pd.CategoricalIndex( + expected = Series([4, 3, 2], index=pd.CategoricalIndex( ['a', 'b', 'c'], categories=['b', 'a', 'c'], ordered=True)) tm.assert_series_equal(result, expected, check_index_type=True) result = s.value_counts(dropna=False) - expected = pd.Series([4, 3, 2, 1], index=pd.CategoricalIndex( + expected = Series([4, 3, 2, 1], index=pd.CategoricalIndex( ['a', 'b', 'c', np.nan], categories=['b', 'a', 'c'], ordered=True)) tm.assert_series_equal(result, expected, check_index_type=True) @@ -595,33 +705,33 @@ def test_dropna(self): # https://github.com/pandas-dev/pandas/issues/9443#issuecomment-73719328 tm.assert_series_equal( - pd.Series([True, True, False]).value_counts(dropna=True), - pd.Series([2, 1], index=[True, False])) + Series([True, True, False]).value_counts(dropna=True), + Series([2, 1], index=[True, False])) tm.assert_series_equal( - pd.Series([True, True, False]).value_counts(dropna=False), - pd.Series([2, 1], index=[True, False])) + Series([True, True, False]).value_counts(dropna=False), + Series([2, 1], index=[True, False])) tm.assert_series_equal( - pd.Series([True, True, False, None]).value_counts(dropna=True), - pd.Series([2, 1], index=[True, False])) + Series([True, True, False, None]).value_counts(dropna=True), + Series([2, 1], index=[True, False])) tm.assert_series_equal( - pd.Series([True, True, False, None]).value_counts(dropna=False), - pd.Series([2, 1, 1], index=[True, False, np.nan])) + Series([True, True, False, None]).value_counts(dropna=False), + Series([2, 1, 1], index=[True, False, np.nan])) tm.assert_series_equal( - pd.Series([10.3, 5., 5.]).value_counts(dropna=True), - pd.Series([2, 1], index=[5., 10.3])) + Series([10.3, 5., 5.]).value_counts(dropna=True), + Series([2, 1], index=[5., 10.3])) tm.assert_series_equal( - pd.Series([10.3, 5., 5.]).value_counts(dropna=False), - pd.Series([2, 1], index=[5., 10.3])) + Series([10.3, 5., 5.]).value_counts(dropna=False), + Series([2, 1], index=[5., 10.3])) tm.assert_series_equal( - pd.Series([10.3, 5., 5., None]).value_counts(dropna=True), - pd.Series([2, 1], index=[5., 10.3])) + Series([10.3, 5., 5., None]).value_counts(dropna=True), + Series([2, 1], index=[5., 10.3])) # 32-bit linux has a different ordering if not compat.is_platform_32bit(): - result = pd.Series([10.3, 5., 5., None]).value_counts(dropna=False) - expected = pd.Series([2, 1, 1], index=[5., 10.3, np.nan]) + result = Series([10.3, 5., 5., None]).value_counts(dropna=False) + expected = Series([2, 1, 1], index=[5., 10.3, np.nan]) tm.assert_series_equal(result, expected) def test_value_counts_normalized(self): @@ -736,15 +846,15 @@ def test_numeric_object_likes(self): tm.assert_numpy_array_equal(res_false, exp_false) # series - for s in [pd.Series(case), pd.Series(case, dtype='category')]: + for s in [Series(case), Series(case, dtype='category')]: res_first = s.duplicated(keep='first') - tm.assert_series_equal(res_first, pd.Series(exp_first)) + tm.assert_series_equal(res_first, Series(exp_first)) res_last = s.duplicated(keep='last') - tm.assert_series_equal(res_last, pd.Series(exp_last)) + tm.assert_series_equal(res_last, Series(exp_last)) res_false = s.duplicated(keep=False) - tm.assert_series_equal(res_false, pd.Series(exp_false)) + tm.assert_series_equal(res_false, Series(exp_false)) def test_datetime_likes(self): @@ -753,8 +863,8 @@ def test_datetime_likes(self): td = ['1 days', '2 days', '1 days', 'NaT', '3 days', '2 days', '4 days', '1 days', 'NaT', '6 days'] - cases = [np.array([pd.Timestamp(d) for d in dt]), - np.array([pd.Timestamp(d, tz='US/Eastern') for d in dt]), + cases = [np.array([Timestamp(d) for d in dt]), + np.array([Timestamp(d, tz='US/Eastern') for d in dt]), np.array([pd.Period(d, freq='D') for d in dt]), np.array([np.datetime64(d) for d in dt]), np.array([pd.Timedelta(d) for d in td])] @@ -788,16 +898,16 @@ def test_datetime_likes(self): tm.assert_numpy_array_equal(res_false, exp_false) # series - for s in [pd.Series(case), pd.Series(case, dtype='category'), - pd.Series(case, dtype=object)]: + for s in [Series(case), Series(case, dtype='category'), + Series(case, dtype=object)]: res_first = s.duplicated(keep='first') - tm.assert_series_equal(res_first, pd.Series(exp_first)) + tm.assert_series_equal(res_first, Series(exp_first)) res_last = s.duplicated(keep='last') - tm.assert_series_equal(res_last, pd.Series(exp_last)) + tm.assert_series_equal(res_last, Series(exp_last)) res_false = s.duplicated(keep=False) - tm.assert_series_equal(res_false, pd.Series(exp_false)) + tm.assert_series_equal(res_false, Series(exp_false)) def test_unique_index(self): cases = [pd.Index([1, 2, 3]), pd.RangeIndex(0, 3)] @@ -939,7 +1049,7 @@ def test_lookup_overflow(self): np.arange(len(xs), dtype=np.int64)) def test_get_unique(self): - s = pd.Series([1, 2, 2**63, 2**63], dtype=np.uint64) + s = Series([1, 2, 2**63, 2**63], dtype=np.uint64) exp = np.array([1, 2, 2**63], dtype=np.uint64) self.assert_numpy_array_equal(s.unique(), exp)