From 658f1ab444e0f98701babca86874acdddadeb707 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 30 May 2017 17:03:56 -0500 Subject: [PATCH 1/2] BUG: Fixed pd.unique on array of tuples Closes #16519 --- doc/source/whatsnew/v0.20.2.txt | 3 +-- pandas/core/algorithms.py | 2 +- pandas/tests/test_algos.py | 16 ++++++++++++++++ 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt index 676da5c370041..823f3359d2f9b 100644 --- a/doc/source/whatsnew/v0.20.2.txt +++ b/doc/source/whatsnew/v0.20.2.txt @@ -44,8 +44,7 @@ Bug Fixes - Bug in ``DataFrame.update()`` with ``overwrite=False`` and ``NaN values`` (:issue:`15593`) - Passing an invalid engine to :func:`read_csv` now raises an informative ``ValueError`` rather than ``UnboundLocalError``. (:issue:`16511`) - - +- Bug in :func:`pd.unique` on an array of tuples (:issue:`16519`) - Fixed a compatibility issue with IPython 6.0's tab completion showing deprecation warnings on Categoricals (:issue:`16409`) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 77d79c9585e57..9cfaf045e894d 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -163,7 +163,7 @@ def _ensure_arraylike(values): ABCIndexClass, ABCSeries)): inferred = lib.infer_dtype(values) if inferred in ['mixed', 'string', 'unicode']: - values = np.asarray(values, dtype=object) + values = lib.list_to_object_array(values) else: values = np.asarray(values) return values diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 351e646cbb0b2..063dcea5c76d6 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -929,6 +929,22 @@ def test_unique_index(self): tm.assert_numpy_array_equal(case.duplicated(), np.array([False, False, False])) + @pytest.mark.parametrize('arr, unique', [ + ([(0, 0), (0, 1), (1, 0), (1, 1), (0, 0), (0, 1), (1, 0), (1, 1)], + [(0, 0), (0, 1), (1, 0), (1, 1)]), + ([('b', 'c'), ('a', 'b'), ('a', 'b'), ('b', 'c')], + [('b', 'c'), ('a', 'b')]), + ([('a', 1), ('b', 2), ('a', 3), ('a', 1)], + [('a', 1), ('b', 2), ('a', 3)]), + ]) + def test_unique_tuples(self, arr, unique): + # https://github.com/pandas-dev/pandas/issues/16519 + expected = np.empty(len(unique), dtype=object) + expected[:] = unique + + result = pd.unique(arr) + tm.assert_numpy_array_equal(result, expected) + class GroupVarTestMixin(object): From eb7c18f56d4c175b4248f35045343126750ddb26 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 30 May 2017 21:13:23 -0500 Subject: [PATCH 2/2] fixup! BUG: Fixed pd.unique on array of tuples --- doc/source/whatsnew/v0.20.2.txt | 2 +- pandas/core/algorithms.py | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt index 823f3359d2f9b..2f0ee7a1a6e71 100644 --- a/doc/source/whatsnew/v0.20.2.txt +++ b/doc/source/whatsnew/v0.20.2.txt @@ -44,7 +44,7 @@ Bug Fixes - Bug in ``DataFrame.update()`` with ``overwrite=False`` and ``NaN values`` (:issue:`15593`) - Passing an invalid engine to :func:`read_csv` now raises an informative ``ValueError`` rather than ``UnboundLocalError``. (:issue:`16511`) -- Bug in :func:`pd.unique` on an array of tuples (:issue:`16519`) +- Bug in :func:`unique` on an array of tuples (:issue:`16519`) - Fixed a compatibility issue with IPython 6.0's tab completion showing deprecation warnings on Categoricals (:issue:`16409`) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 9cfaf045e894d..d74c5e66ea1a9 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -328,6 +328,11 @@ def unique(values): [b, a, c] Categories (3, object): [a < b < c] + An array of tuples + + >>> pd.unique([('a', 'b'), ('b', 'a'), ('a', 'c'), ('b', 'a')]) + array([('a', 'b'), ('b', 'a'), ('a', 'c')], dtype=object) + See Also -------- pandas.Index.unique