Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/reference/testing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ Exceptions and warnings
errors.AccessorRegistrationWarning
errors.AttributeConflictWarning
errors.CategoricalConversionWarning
errors.ChainedAssignmentError
errors.ClosedFileError
errors.CSSWarning
errors.DatabaseError
Expand Down
11 changes: 10 additions & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import functools
from io import StringIO
import itertools
import sys
from textwrap import dedent
from typing import (
TYPE_CHECKING,
Expand Down Expand Up @@ -95,7 +96,11 @@
function as nv,
np_percentile_argname,
)
from pandas.errors import InvalidIndexError
from pandas.errors import (
ChainedAssignmentError,
InvalidIndexError,
_chained_assignment_msg,
)
from pandas.util._decorators import (
Appender,
Substitution,
Expand Down Expand Up @@ -3838,6 +3843,10 @@ def isetitem(self, loc, value) -> None:
self._iset_item_mgr(loc, arraylike, inplace=False)

def __setitem__(self, key, value):
if using_copy_on_write():
if sys.getrefcount(self) <= 3:
raise ChainedAssignmentError(_chained_assignment_msg)

key = com.apply_if_callable(key, self)

# see if we can slice the rows
Expand Down
10 changes: 10 additions & 0 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations

from contextlib import suppress
import sys
from typing import (
TYPE_CHECKING,
Hashable,
Expand All @@ -12,6 +13,8 @@

import numpy as np

from pandas._config import using_copy_on_write

from pandas._libs.indexing import NDFrameIndexerBase
from pandas._libs.lib import item_from_zerodim
from pandas._typing import (
Expand All @@ -20,9 +23,11 @@
)
from pandas.errors import (
AbstractMethodError,
ChainedAssignmentError,
IndexingError,
InvalidIndexError,
LossySetitemError,
_chained_assignment_msg,
)
from pandas.util._decorators import doc

Expand Down Expand Up @@ -830,6 +835,11 @@ def _ensure_listlike_indexer(self, key, axis=None, value=None) -> None:

@final
def __setitem__(self, key, value) -> None:
if using_copy_on_write():
# print("_LocationIndexer.__setitem__ refcount: ",sys.getrefcount(self.obj))
if sys.getrefcount(self.obj) <= 2:
raise ChainedAssignmentError(_chained_assignment_msg)

check_dict_or_set_indexers(key)
if isinstance(key, tuple):
key = tuple(list(x) if is_iterator(x) else x for x in key)
Expand Down
12 changes: 11 additions & 1 deletion pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"""
from __future__ import annotations

import sys
from textwrap import dedent
from typing import (
IO,
Expand Down Expand Up @@ -68,7 +69,11 @@
npt,
)
from pandas.compat.numpy import function as nv
from pandas.errors import InvalidIndexError
from pandas.errors import (
ChainedAssignmentError,
InvalidIndexError,
_chained_assignment_msg,
)
from pandas.util._decorators import (
Appender,
Substitution,
Expand Down Expand Up @@ -1070,6 +1075,11 @@ def _get_value(self, label, takeable: bool = False):
return self.iloc[loc]

def __setitem__(self, key, value) -> None:
if using_copy_on_write():
# print("Series.__getitem__ refcount: ", sys.getrefcount(self))
if sys.getrefcount(self) <= 3:
raise ChainedAssignmentError(_chained_assignment_msg)

check_dict_or_set_indexers(key)
key = com.apply_if_callable(key, self)
cacher_needs_updating = self._check_is_chained_assignment_possible()
Expand Down
36 changes: 36 additions & 0 deletions pandas/errors/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,42 @@ class SettingWithCopyWarning(Warning):
"""


class ChainedAssignmentError(ValueError):
"""
Exception raised when trying to set using chained assignment.

When the ``mode.copy_on_write`` option is enabled, chained assignment can
never work. In such a situation, we are always setting into a temporary
object that is the result of indexing (getitem), which under Copy-on-Write
always behaves as a copy. Thus, assigning through a chain can never
update the original Series or DataFrame.

For more information on view vs. copy,
see :ref:`the user guide<indexing.view_versus_copy>`.

Examples
--------
>>> pd.options.mode.copy_on_write = True
>>> df = pd.DataFrame({'A': [1, 1, 1, 2, 2]}, columns=['A'])
>>> df["A"][0:3] = 10 # doctest: +SKIP
... # ChainedAssignmentError: ...
"""


_chained_assignment_msg = (
"A value is trying to be set on a copy of a DataFrame or Series "
"through chained assignment.\n"
"When using the Copy-on-Write mode, such chained assignment never works "
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

never updates the original....

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's on the next line?

Copy link
Member

@phofl phofl Jan 16, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah sorry, should have been clearer:

I would reword to:

When using the Copy-on-Write mode, such chained assignment never updates the original DataFrame or Series, ...

This sounds better to me

"to update the original DataFrame or Series, because the intermediate "
"object on which we are setting values always behaves as a copy.\n\n"
"Try using '.loc[row_indexer, col_indexer] = value' instead, to perform "
"the assignment in a single step.\n\n"
"See the caveats in the documentation: "
"https://pandas.pydata.org/pandas-docs/stable/user_guide/"
"indexing.html#returning-a-view-versus-a-copy"
)


class NumExprClobberingError(NameError):
"""
Exception raised when trying to use a built-in numexpr name as a variable name.
Expand Down
10 changes: 7 additions & 3 deletions pandas/tests/frame/indexing/test_setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import numpy as np
import pytest

from pandas.errors import ChainedAssignmentError
import pandas.util._test_decorators as td

from pandas.core.dtypes.base import _registry as ea_registry
Expand Down Expand Up @@ -1245,12 +1246,15 @@ def test_setitem_column_update_inplace(self, using_copy_on_write):
df = DataFrame({col: np.zeros(len(labels)) for col in labels}, index=labels)
values = df._mgr.blocks[0].values

for label in df.columns:
df[label][label] = 1

if not using_copy_on_write:
for label in df.columns:
df[label][label] = 1

# diagonal values all updated
assert np.all(values[np.arange(10), np.arange(10)] == 1)
else:
with pytest.raises(ChainedAssignmentError):
for label in df.columns:
df[label][label] = 1
# original dataframe not updated
assert np.all(values[np.arange(10), np.arange(10)] == 0)
8 changes: 6 additions & 2 deletions pandas/tests/frame/indexing/test_xs.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@
import numpy as np
import pytest

from pandas.errors import SettingWithCopyError
from pandas.errors import (
ChainedAssignmentError,
SettingWithCopyError,
)

from pandas import (
DataFrame,
Expand Down Expand Up @@ -124,7 +127,8 @@ def test_xs_view(self, using_array_manager, using_copy_on_write):
df_orig = dm.copy()

if using_copy_on_write:
dm.xs(2)[:] = 20
with pytest.raises(ChainedAssignmentError):
dm.xs(2)[:] = 20
tm.assert_frame_equal(dm, df_orig)
elif using_array_manager:
# INFO(ArrayManager) with ArrayManager getting a row as a view is
Expand Down
11 changes: 9 additions & 2 deletions pandas/tests/frame/test_block_internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,10 @@
import numpy as np
import pytest

from pandas.errors import PerformanceWarning
from pandas.errors import (
ChainedAssignmentError,
PerformanceWarning,
)
import pandas.util._test_decorators as td

import pandas as pd
Expand Down Expand Up @@ -340,7 +343,11 @@ def test_stale_cached_series_bug_473(self, using_copy_on_write):
)
repr(Y)
Y["e"] = Y["e"].astype("object")
Y["g"]["c"] = np.NaN
if using_copy_on_write:
with pytest.raises(ChainedAssignmentError):
Y["g"]["c"] = np.NaN
else:
Y["g"]["c"] = np.NaN
repr(Y)
result = Y.sum() # noqa
exp = Y["g"].sum() # noqa
Expand Down
13 changes: 9 additions & 4 deletions pandas/tests/indexing/multiindex/test_chaining_and_caching.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
import numpy as np
import pytest

from pandas.errors import SettingWithCopyError
from pandas.errors import (
ChainedAssignmentError,
SettingWithCopyError,
)
import pandas.util._test_decorators as td

from pandas import (
Expand Down Expand Up @@ -50,11 +53,13 @@ def test_cache_updating(using_copy_on_write):

# setting via chained assignment
# but actually works, since everything is a view
df.loc[0]["z"].iloc[0] = 1.0
result = df.loc[(0, 0), "z"]
if using_copy_on_write:
assert result == df_original.loc[0, "z"]
with pytest.raises(ChainedAssignmentError):
df.loc[0]["z"].iloc[0] = 1.0
assert df.loc[(0, 0), "z"] == df_original.loc[0, "z"]
else:
df.loc[0]["z"].iloc[0] = 1.0
result = df.loc[(0, 0), "z"]
assert result == 1

# correct setting
Expand Down
15 changes: 11 additions & 4 deletions pandas/tests/indexing/multiindex/test_partial.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import numpy as np
import pytest

from pandas.errors import ChainedAssignmentError
import pandas.util._test_decorators as td

from pandas import (
Expand Down Expand Up @@ -132,20 +133,26 @@ def test_partial_set(
exp.iloc[65:85] = 0
tm.assert_frame_equal(df, exp)

df["A"].loc[2000, 4] = 1
if not using_copy_on_write:
exp["A"].loc[2000, 4].values[:] = 1
if using_copy_on_write:
with pytest.raises(ChainedAssignmentError):
df["A"].loc[2000, 4] = 1
df.loc[(2000, 4), "A"] = 1
else:
df["A"].loc[2000, 4] = 1
exp.iloc[65:85, 0] = 1
tm.assert_frame_equal(df, exp)

df.loc[2000] = 5
exp.iloc[:100] = 5
tm.assert_frame_equal(df, exp)

# this works...for now
df["A"].iloc[14] = 5
if using_copy_on_write:
with pytest.raises(ChainedAssignmentError):
df["A"].iloc[14] = 5
df["A"].iloc[14] == exp["A"].iloc[14]
else:
df["A"].iloc[14] = 5
assert df["A"].iloc[14] == 5

@pytest.mark.parametrize("dtype", [int, float])
Expand Down
12 changes: 8 additions & 4 deletions pandas/tests/indexing/multiindex/test_setitem.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
import numpy as np
import pytest

from pandas.errors import SettingWithCopyError
from pandas.errors import (
ChainedAssignmentError,
SettingWithCopyError,
)
import pandas.util._test_decorators as td

import pandas as pd
Expand Down Expand Up @@ -501,8 +504,8 @@ def test_frame_setitem_copy_raises(
# will raise/warn as its chained assignment
df = multiindex_dataframe_random_data.T
if using_copy_on_write:
# TODO(CoW) it would be nice if this could still warn/raise
df["foo"]["one"] = 2
with pytest.raises(ChainedAssignmentError):
df["foo"]["one"] = 2
else:
msg = "A value is trying to be set on a copy of a slice from a DataFrame"
with pytest.raises(SettingWithCopyError, match=msg):
Expand All @@ -516,7 +519,8 @@ def test_frame_setitem_copy_no_write(
expected = frame
df = frame.copy()
if using_copy_on_write:
df["foo"]["one"] = 2
with pytest.raises(ChainedAssignmentError):
df["foo"]["one"] = 2
else:
msg = "A value is trying to be set on a copy of a slice from a DataFrame"
with pytest.raises(SettingWithCopyError, match=msg):
Expand Down
Loading