From 5349052667163ea2baa8897b050950c9215bf434 Mon Sep 17 00:00:00 2001
From: Erik Oveson <erikov@microsoft.com>
Date: Sat, 17 Nov 2018 20:17:23 -0800
Subject: [PATCH 01/12] TST: Add test case for GH14080 for overflow exception

---
 .../tests/scalar/timestamp/test_arithmetic.py | 27 ++++++++++++++++++-
 1 file changed, 26 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/scalar/timestamp/test_arithmetic.py b/pandas/tests/scalar/timestamp/test_arithmetic.py
index 0f8ddc53734f6..6120d024cfd69 100644
--- a/pandas/tests/scalar/timestamp/test_arithmetic.py
+++ b/pandas/tests/scalar/timestamp/test_arithmetic.py
@@ -7,11 +7,12 @@
 import pandas.util.testing as tm
 from pandas.compat import long
 from pandas.tseries import offsets
+from pandas.tseries.frequencies import to_offset
 from pandas import Timestamp, Timedelta
 
 
 class TestTimestampArithmetic(object):
-    def test_overflow_offset(self):
+    def test_overflow_offset1(self):
         # xref https://github.com/statsmodels/statsmodels/issues/3374
         # ends up multiplying really large numbers which overflow
 
@@ -27,6 +28,30 @@ def test_overflow_offset(self):
         with pytest.raises(OverflowError):
             stamp - offset
 
+    def test_overflow_offset2(self):
+        # xref https://github.com/pandas-dev/pandas/issues/14080
+
+        stamp = Timestamp("2000/1/1")
+        offset_overflow = to_offset("D")*100**25
+        offset_no_overflow = to_offset("D")*100
+
+        with pytest.raises(OverflowError):
+            stamp + offset_overflow
+
+        with pytest.raises(OverflowError):
+            offset_overflow + stamp
+
+        with pytest.raises(OverflowError):
+            stamp - offset_overflow
+
+        expected = Timestamp("2000/04/10")
+        assert stamp + offset_no_overflow == expected
+
+        assert offset_no_overflow + stamp == expected
+
+        expected = Timestamp("1999/09/23")
+        assert stamp - offset_no_overflow == expected
+
     def test_delta_preserve_nanos(self):
         val = Timestamp(long(1337299200000000123))
         result = val + timedelta(1)

From a7df6eade76f0f190332696ae1d9871fd17389aa Mon Sep 17 00:00:00 2001
From: Erik Oveson <erikov@microsoft.com>
Date: Sat, 17 Nov 2018 20:39:37 -0800
Subject: [PATCH 02/12] TST: Add test case for GH14080 for overflow exception

---
 pandas/tests/scalar/timestamp/test_arithmetic.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/pandas/tests/scalar/timestamp/test_arithmetic.py b/pandas/tests/scalar/timestamp/test_arithmetic.py
index 6120d024cfd69..e8f4d93613f76 100644
--- a/pandas/tests/scalar/timestamp/test_arithmetic.py
+++ b/pandas/tests/scalar/timestamp/test_arithmetic.py
@@ -30,11 +30,13 @@ def test_overflow_offset1(self):
 
     def test_overflow_offset2(self):
         # xref https://github.com/pandas-dev/pandas/issues/14080
+        # used to crash, so check for proper overflow exception
 
         stamp = Timestamp("2000/1/1")
         offset_overflow = to_offset("D")*100**25
         offset_no_overflow = to_offset("D")*100
 
+        # overflow expected
         with pytest.raises(OverflowError):
             stamp + offset_overflow
 
@@ -44,6 +46,7 @@ def test_overflow_offset2(self):
         with pytest.raises(OverflowError):
             stamp - offset_overflow
 
+        # no overflow expected
         expected = Timestamp("2000/04/10")
         assert stamp + offset_no_overflow == expected
 

From f7be8f3828f9eddd204af577216e388d7cf5da12 Mon Sep 17 00:00:00 2001
From: Erik Oveson <erikov@microsoft.com>
Date: Sat, 17 Nov 2018 20:53:20 -0800
Subject: [PATCH 03/12] TST: Add test case for GH14080 for overflow exception

---
 pandas/tests/scalar/timestamp/test_arithmetic.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/scalar/timestamp/test_arithmetic.py b/pandas/tests/scalar/timestamp/test_arithmetic.py
index e8f4d93613f76..a79b39b641064 100644
--- a/pandas/tests/scalar/timestamp/test_arithmetic.py
+++ b/pandas/tests/scalar/timestamp/test_arithmetic.py
@@ -33,8 +33,8 @@ def test_overflow_offset2(self):
         # used to crash, so check for proper overflow exception
 
         stamp = Timestamp("2000/1/1")
-        offset_overflow = to_offset("D")*100**25
-        offset_no_overflow = to_offset("D")*100
+        offset_overflow = to_offset("D") * 100 ** 25
+        offset_no_overflow = to_offset("D") * 100
 
         # overflow expected
         with pytest.raises(OverflowError):

From c727003fd4f9619dffe3c555de8cce3158497919 Mon Sep 17 00:00:00 2001
From: Erik Oveson <erikov@microsoft.com>
Date: Sun, 18 Nov 2018 08:45:33 -0800
Subject: [PATCH 04/12] TST: For GH14080, break up tests, test message

---
 .../tests/scalar/timestamp/test_arithmetic.py | 51 ++++++++++---------
 1 file changed, 28 insertions(+), 23 deletions(-)

diff --git a/pandas/tests/scalar/timestamp/test_arithmetic.py b/pandas/tests/scalar/timestamp/test_arithmetic.py
index a79b39b641064..207bd103105ea 100644
--- a/pandas/tests/scalar/timestamp/test_arithmetic.py
+++ b/pandas/tests/scalar/timestamp/test_arithmetic.py
@@ -12,49 +12,54 @@
 
 
 class TestTimestampArithmetic(object):
-    def test_overflow_offset1(self):
+    def test_overflow_offset(self):
+        # no overflow expected
+
+        stamp = Timestamp("2000/1/1")
+        offset_no_overflow = to_offset("D") * 100
+
+        expected = Timestamp("2000/04/10")
+        assert stamp + offset_no_overflow == expected
+
+        assert offset_no_overflow + stamp == expected
+
+        expected = Timestamp("1999/09/23")
+        assert stamp - offset_no_overflow == expected
+
+    def test_overflow_offset_raises(self):
         # xref https://github.com/statsmodels/statsmodels/issues/3374
         # ends up multiplying really large numbers which overflow
 
         stamp = Timestamp('2017-01-13 00:00:00', freq='D')
-        offset = 20169940 * offsets.Day(1)
+        offset_overflow = 20169940 * offsets.Day(1)
+        msg = ("the add operation between "
+               r"\<-?\d+ \* Days\> and \d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2} "
+               "will overflow")
 
-        with pytest.raises(OverflowError):
-            stamp + offset
+        with pytest.raises(OverflowError, match=msg):
+            stamp + offset_overflow
 
-        with pytest.raises(OverflowError):
-            offset + stamp
+        with pytest.raises(OverflowError, match=msg):
+            offset_overflow + stamp
 
-        with pytest.raises(OverflowError):
-            stamp - offset
+        with pytest.raises(OverflowError, match=msg):
+            stamp - offset_overflow
 
-    def test_overflow_offset2(self):
         # xref https://github.com/pandas-dev/pandas/issues/14080
         # used to crash, so check for proper overflow exception
 
         stamp = Timestamp("2000/1/1")
         offset_overflow = to_offset("D") * 100 ** 25
-        offset_no_overflow = to_offset("D") * 100
 
-        # overflow expected
-        with pytest.raises(OverflowError):
+        with pytest.raises(OverflowError, match=msg):
             stamp + offset_overflow
 
-        with pytest.raises(OverflowError):
+        with pytest.raises(OverflowError, match=msg):
             offset_overflow + stamp
 
-        with pytest.raises(OverflowError):
+        with pytest.raises(OverflowError, match=msg):
             stamp - offset_overflow
 
-        # no overflow expected
-        expected = Timestamp("2000/04/10")
-        assert stamp + offset_no_overflow == expected
-
-        assert offset_no_overflow + stamp == expected
-
-        expected = Timestamp("1999/09/23")
-        assert stamp - offset_no_overflow == expected
-
     def test_delta_preserve_nanos(self):
         val = Timestamp(long(1337299200000000123))
         result = val + timedelta(1)

From eecede19e4ac4afc97802f2496e7fe98343b8039 Mon Sep 17 00:00:00 2001
From: Erik Oveson <erikov@microsoft.com>
Date: Sun, 18 Nov 2018 22:33:28 -0800
Subject: [PATCH 05/12] TST: For GH4861, Period and datetime in multiindex

---
 pandas/tests/indexes/period/test_period.py | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py
index ddb3fe686534a..4a18ce425be66 100644
--- a/pandas/tests/indexes/period/test_period.py
+++ b/pandas/tests/indexes/period/test_period.py
@@ -1,3 +1,5 @@
+from datetime import datetime
+
 import numpy as np
 import pytest
 
@@ -570,3 +572,20 @@ def test_maybe_convert_timedelta():
     offset = offsets.BusinessDay()
     with pytest.raises(ValueError, match='freq'):
         pi._maybe_convert_timedelta(offset)
+
+
+def test_multiindex_period_datetime():
+    # GH4861, using datetime in period of multiindex raises exception
+
+    idx1 = Index(['a', 'a', 'a', 'b', 'b'])
+    idx2 = period_range('2012-01', periods=len(idx1), freq='M')
+    s = Series(np.random.randn(len(idx1)), [idx1, idx2])
+
+    # try Period as index
+    expected = s.iloc[0]
+    result = s.loc['a', Period('2012-01')]
+    assert result == expected
+
+    # try datetime as index
+    result = s.loc['a', datetime(2012, 1, 1)]
+    assert result == expected

From ae90f93757898814ded05f78549123e2e94ce834 Mon Sep 17 00:00:00 2001
From: Erik Oveson <erikov@microsoft.com>
Date: Mon, 19 Nov 2018 07:19:44 -0800
Subject: [PATCH 06/12] TST: GH4861 move changes to different test file

---
 pandas/tests/indexes/period/test_period.py | 19 -------------------
 pandas/tests/indexing/test_multiindex.py   | 22 +++++++++++++++++++++-
 2 files changed, 21 insertions(+), 20 deletions(-)

diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py
index 4a18ce425be66..ddb3fe686534a 100644
--- a/pandas/tests/indexes/period/test_period.py
+++ b/pandas/tests/indexes/period/test_period.py
@@ -1,5 +1,3 @@
-from datetime import datetime
-
 import numpy as np
 import pytest
 
@@ -572,20 +570,3 @@ def test_maybe_convert_timedelta():
     offset = offsets.BusinessDay()
     with pytest.raises(ValueError, match='freq'):
         pi._maybe_convert_timedelta(offset)
-
-
-def test_multiindex_period_datetime():
-    # GH4861, using datetime in period of multiindex raises exception
-
-    idx1 = Index(['a', 'a', 'a', 'b', 'b'])
-    idx2 = period_range('2012-01', periods=len(idx1), freq='M')
-    s = Series(np.random.randn(len(idx1)), [idx1, idx2])
-
-    # try Period as index
-    expected = s.iloc[0]
-    result = s.loc['a', Period('2012-01')]
-    assert result == expected
-
-    # try datetime as index
-    result = s.loc['a', datetime(2012, 1, 1)]
-    assert result == expected
diff --git a/pandas/tests/indexing/test_multiindex.py b/pandas/tests/indexing/test_multiindex.py
index ea17844a75033..17fb9952c8680 100644
--- a/pandas/tests/indexing/test_multiindex.py
+++ b/pandas/tests/indexing/test_multiindex.py
@@ -1,5 +1,7 @@
 from warnings import catch_warnings
 
+from datetime import datetime
+
 import numpy as np
 import pytest
 
@@ -7,7 +9,8 @@
 
 import pandas as pd
 from pandas import (
-    DataFrame, Index, MultiIndex, Panel, Series, Timestamp, date_range)
+    DataFrame, date_range, Index, MultiIndex, Panel, Period, period_range,
+    Series, Timestamp)
 from pandas.tests.indexing.common import _mklbl
 from pandas.util import testing as tm
 
@@ -1340,3 +1343,20 @@ def test_panel_setitem_with_multiindex(self):
         p5.iloc[0, :, 0] = [1, 2]
         expected = Panel(arr, **axes)
         tm.assert_panel_equal(p5, expected)
+
+
+def test_multiindex_period_datetime():
+    # GH4861, using datetime in period of multiindex raises exception
+
+    idx1 = Index(['a', 'a', 'a', 'b', 'b'])
+    idx2 = period_range('2012-01', periods=len(idx1), freq='M')
+    s = Series(np.random.randn(len(idx1)), [idx1, idx2])
+
+    # try Period as index
+    expected = s.iloc[0]
+    result = s.loc['a', Period('2012-01')]
+    assert result == expected
+
+    # try datetime as index
+    result = s.loc['a', datetime(2012, 1, 1)]
+    assert result == expected

From 4f16c8b9d9af83880144fc5d3046e20519ae554c Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Mon, 19 Nov 2018 20:58:40 -0500
Subject: [PATCH 07/12] fix sorting

---
 pandas/tests/indexing/test_multiindex.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/pandas/tests/indexing/test_multiindex.py b/pandas/tests/indexing/test_multiindex.py
index 17fb9952c8680..f4caf17b60d65 100644
--- a/pandas/tests/indexing/test_multiindex.py
+++ b/pandas/tests/indexing/test_multiindex.py
@@ -1,6 +1,5 @@
-from warnings import catch_warnings
-
 from datetime import datetime
+from warnings import catch_warnings
 
 import numpy as np
 import pytest
@@ -9,8 +8,8 @@
 
 import pandas as pd
 from pandas import (
-    DataFrame, date_range, Index, MultiIndex, Panel, Period, period_range,
-    Series, Timestamp)
+    DataFrame, Index, MultiIndex, Panel, Period, Series, Timestamp, date_range,
+    period_range)
 from pandas.tests.indexing.common import _mklbl
 from pandas.util import testing as tm
 

From 3e96734483868d3d92da0cd9f43540841280885b Mon Sep 17 00:00:00 2001
From: Erik Oveson <erikov@microsoft.com>
Date: Sat, 24 Nov 2018 14:46:31 -0800
Subject: [PATCH 08/12] PERF: For GH23814, return early in Categorical.__init__

---
 asv_bench/benchmarks/categoricals.py |  4 ++++
 pandas/core/arrays/categorical.py    | 10 ++++++++++
 2 files changed, 14 insertions(+)

diff --git a/asv_bench/benchmarks/categoricals.py b/asv_bench/benchmarks/categoricals.py
index 8a0fbc48755b5..6b86f5ab6b3f1 100644
--- a/asv_bench/benchmarks/categoricals.py
+++ b/asv_bench/benchmarks/categoricals.py
@@ -46,6 +46,7 @@ def setup(self):
         self.values_some_nan = list(np.tile(self.categories + [np.nan], N))
         self.values_all_nan = [np.nan] * len(self.values)
         self.values_all_int8 = np.ones(N, 'int8')
+        self.categorical = pd.Categorical(self.values, self.categories)
 
     def time_regular(self):
         pd.Categorical(self.values, self.categories)
@@ -68,6 +69,9 @@ def time_all_nan(self):
     def time_from_codes_all_int8(self):
         pd.Categorical.from_codes(self.values_all_int8, self.categories)
 
+    def time_existing_categorical(self):
+        pd.Categorical(self.categorical)
+
 
 class ValueCounts(object):
 
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 6dc3a960dc817..011f97d067a72 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -314,6 +314,16 @@ class Categorical(ExtensionArray, PandasObject):
     def __init__(self, values, categories=None, ordered=None, dtype=None,
                  fastpath=False):
 
+        # GH23814, for perf, if no optional params used and values already an
+        # instance of Categorical, simply return the same dtype/codes
+        if categories is None and ordered is None and dtype is None:
+            if isinstance(values, (ABCSeries, ABCIndexClass)):
+                values = values._values
+            if isinstance(values, type(self)):
+                self._dtype = values.dtype
+                self._codes = values.codes.copy()
+                return
+
         # Ways of specifying the dtype (prioritized ordered)
         # 1. dtype is a CategoricalDtype
         #    a.) with known categories, use dtype.categories

From f6d10b8e8a4cbb93f8c3a499a64735c12898f76d Mon Sep 17 00:00:00 2001
From: Erik Oveson <erikov@microsoft.com>
Date: Sat, 24 Nov 2018 14:54:05 -0800
Subject: [PATCH 09/12] PERF: For GH23814, add whatsnew entry

---
 doc/source/whatsnew/v0.24.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
index ba29a17057d02..a4b2c097d8d9c 100644
--- a/doc/source/whatsnew/v0.24.0.rst
+++ b/doc/source/whatsnew/v0.24.0.rst
@@ -1150,7 +1150,7 @@ Performance Improvements
 - Improved performance of :func:`pd.concat` for `Series` objects (:issue:`23404`)
 - Improved performance of :meth:`DatetimeIndex.normalize` and :meth:`Timestamp.normalize` for timezone naive or UTC datetimes (:issue:`23634`)
 - Improved performance of :meth:`DatetimeIndex.tz_localize` and various ``DatetimeIndex`` attributes with dateutil UTC timezone (:issue:`23772`)
-
+- Improved performance of :meth:`Categorical.__init__` (:issue:`23814`)
 
 .. _whatsnew_0240.docs:
 

From 2173c896059b13034fb4be30db1c0845c30ea07b Mon Sep 17 00:00:00 2001
From: Erik Oveson <erikov@microsoft.com>
Date: Wed, 28 Nov 2018 21:20:17 -0800
Subject: [PATCH 10/12] Consolidating code to fit existing pattern better

---
 pandas/core/arrays/categorical.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 011f97d067a72..c084531c2f5d7 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -314,16 +314,6 @@ class Categorical(ExtensionArray, PandasObject):
     def __init__(self, values, categories=None, ordered=None, dtype=None,
                  fastpath=False):
 
-        # GH23814, for perf, if no optional params used and values already an
-        # instance of Categorical, simply return the same dtype/codes
-        if categories is None and ordered is None and dtype is None:
-            if isinstance(values, (ABCSeries, ABCIndexClass)):
-                values = values._values
-            if isinstance(values, type(self)):
-                self._dtype = values.dtype
-                self._codes = values.codes.copy()
-                return
-
         # Ways of specifying the dtype (prioritized ordered)
         # 1. dtype is a CategoricalDtype
         #    a.) with known categories, use dtype.categories
@@ -357,6 +347,16 @@ def __init__(self, values, categories=None, ordered=None, dtype=None,
             # the "ordered" and "categories" arguments
             dtype = values.dtype._from_categorical_dtype(values.dtype,
                                                          categories, ordered)
+
+            # GH23814, for perf, if no optional params used and values already
+            # an instance of Categorical, set values to codes like fastpath
+            if (isinstance(values, (ABCSeries, ABCIndexClass)) and
+               isinstance(values._values, type(self))):
+                values = values._values.codes.copy()
+                if categories is None:
+                    categories = dtype.categories
+                fastpath = True
+
         else:
             # If dtype=None and values is not categorical, create a new dtype
             dtype = CategoricalDtype(categories, ordered)

From 325be9205576f6aac0d31381703baa8e450c927c Mon Sep 17 00:00:00 2001
From: Erik Oveson <erikov@microsoft.com>
Date: Wed, 28 Nov 2018 21:45:57 -0800
Subject: [PATCH 11/12] Fix comment

---
 pandas/core/arrays/categorical.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index c084531c2f5d7..22ed0b9c5454e 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -348,8 +348,8 @@ def __init__(self, values, categories=None, ordered=None, dtype=None,
             dtype = values.dtype._from_categorical_dtype(values.dtype,
                                                          categories, ordered)
 
-            # GH23814, for perf, if no optional params used and values already
-            # an instance of Categorical, set values to codes like fastpath
+            # GH23814, for perf, if values._values already an instance of
+            # Categorical, set values to codes, and run fastpath
             if (isinstance(values, (ABCSeries, ABCIndexClass)) and
                isinstance(values._values, type(self))):
                 values = values._values.codes.copy()

From 9e270e9a9d27eec1455e462e36dcbe43c615d5fc Mon Sep 17 00:00:00 2001
From: Erik Oveson <erikov@microsoft.com>
Date: Thu, 29 Nov 2018 07:44:13 -0800
Subject: [PATCH 12/12] Add asv test that exercises code, update docs

---
 asv_bench/benchmarks/categoricals.py | 4 ++++
 doc/source/whatsnew/v0.24.0.rst      | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/asv_bench/benchmarks/categoricals.py b/asv_bench/benchmarks/categoricals.py
index 6b86f5ab6b3f1..7318b40efc8fb 100644
--- a/asv_bench/benchmarks/categoricals.py
+++ b/asv_bench/benchmarks/categoricals.py
@@ -47,6 +47,7 @@ def setup(self):
         self.values_all_nan = [np.nan] * len(self.values)
         self.values_all_int8 = np.ones(N, 'int8')
         self.categorical = pd.Categorical(self.values, self.categories)
+        self.series = pd.Series(self.categorical)
 
     def time_regular(self):
         pd.Categorical(self.values, self.categories)
@@ -72,6 +73,9 @@ def time_from_codes_all_int8(self):
     def time_existing_categorical(self):
         pd.Categorical(self.categorical)
 
+    def time_existing_series(self):
+        pd.Categorical(self.series)
+
 
 class ValueCounts(object):
 
diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
index a4b2c097d8d9c..f8e85d7d315d4 100644
--- a/doc/source/whatsnew/v0.24.0.rst
+++ b/doc/source/whatsnew/v0.24.0.rst
@@ -1150,7 +1150,7 @@ Performance Improvements
 - Improved performance of :func:`pd.concat` for `Series` objects (:issue:`23404`)
 - Improved performance of :meth:`DatetimeIndex.normalize` and :meth:`Timestamp.normalize` for timezone naive or UTC datetimes (:issue:`23634`)
 - Improved performance of :meth:`DatetimeIndex.tz_localize` and various ``DatetimeIndex`` attributes with dateutil UTC timezone (:issue:`23772`)
-- Improved performance of :meth:`Categorical.__init__` (:issue:`23814`)
+- Improved performance of :class:`Categorical` constructor for `Series` objects (:issue:`23814`)
 
 .. _whatsnew_0240.docs: