diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 9d1e0c7485092..90fac220396c8 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -222,7 +222,7 @@ Indexing Missing ^^^^^^^ - Bug in :meth:`Index.equals` raising ``TypeError`` when :class:`Index` consists of tuples that contain ``NA`` (:issue:`48446`) -- +- Bug in :meth:`Series.map` caused incorrect result when data has NaNs and defaultdict mapping was used (:issue:`48813`) MultiIndex ^^^^^^^^^^ diff --git a/pandas/core/base.py b/pandas/core/base.py index 927a3ed6a601c..a3ec4a27875f4 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -831,7 +831,9 @@ def _map_values(self, mapper, na_action=None): # If a dictionary subclass defines a default value method, # convert mapper to a lookup function (GH #15999). dict_with_default = mapper - mapper = lambda x: dict_with_default[x] + mapper = lambda x: dict_with_default[ + np.nan if isinstance(x, float) and np.isnan(x) else x + ] else: # Dictionary does not have a default. Thus it's safe to # convert to an Series for efficiency. diff --git a/pandas/tests/apply/test_series_apply.py b/pandas/tests/apply/test_series_apply.py index afe1c236858ab..5221c41ce35d5 100644 --- a/pandas/tests/apply/test_series_apply.py +++ b/pandas/tests/apply/test_series_apply.py @@ -598,6 +598,36 @@ def test_map_dict_na_key(): tm.assert_series_equal(result, expected) +@pytest.mark.parametrize("na_action", [None, "ignore"]) +def test_map_defaultdict_na_key(na_action): + # GH 48813 + s = Series([1, 2, np.nan]) + default_map = defaultdict(lambda: "missing", {1: "a", 2: "b", np.nan: "c"}) + result = s.map(default_map, na_action=na_action) + expected = Series({0: "a", 1: "b", 2: "c" if na_action is None else np.nan}) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("na_action", [None, "ignore"]) +def test_map_defaultdict_missing_key(na_action): + # GH 48813 + s = Series([1, 2, np.nan]) + default_map = defaultdict(lambda: "missing", {1: "a", 2: "b", 3: "c"}) + result = s.map(default_map, na_action=na_action) + expected = Series({0: "a", 1: "b", 2: "missing" if na_action is None else np.nan}) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("na_action", [None, "ignore"]) +def test_map_defaultdict_unmutated(na_action): + # GH 48813 + s = Series([1, 2, np.nan]) + default_map = defaultdict(lambda: "missing", {1: "a", 2: "b", np.nan: "c"}) + expected_default_map = default_map.copy() + s.map(default_map, na_action=na_action) + assert default_map == expected_default_map + + @pytest.mark.parametrize("arg_func", [dict, Series]) def test_map_dict_ignore_na(arg_func): # GH#47527 @@ -613,7 +643,7 @@ def test_map_defaultdict_ignore_na(): mapping = defaultdict(int, {1: 10, np.nan: 42}) ser = Series([1, np.nan, 2]) result = ser.map(mapping) - expected = Series([10, 0, 0]) + expected = Series([10, 42, 0]) tm.assert_series_equal(result, expected)