Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
4330984
Fix iloc setitem with duplicate columns for scalar case
jbrockmendel Feb 28, 2020
620c5b9
Fix setitem in more cases
jbrockmendel Feb 28, 2020
08f9df3
BUG: fix setitem_with_indexer
jbrockmendel Feb 29, 2020
d21d92d
Merge branch 'master' of https://github.com/pandas-dev/pandas into se…
jbrockmendel Feb 29, 2020
5b4711d
comment, no copy needed
jbrockmendel Feb 29, 2020
13d3c19
Merge branch 'master' of https://github.com/pandas-dev/pandas into se…
jbrockmendel Feb 29, 2020
ed9bc77
Merge branch 'master' of https://github.com/pandas-dev/pandas into se…
jbrockmendel Mar 3, 2020
e686fe2
Merge branch 'master' of https://github.com/pandas-dev/pandas into se…
jbrockmendel Mar 3, 2020
e5e470e
Merge branch 'master' of https://github.com/pandas-dev/pandas into se…
jbrockmendel Mar 3, 2020
9063047
update check
jbrockmendel Mar 3, 2020
e74f7ce
Merge branch 'master' of https://github.com/pandas-dev/pandas into se…
jbrockmendel Mar 4, 2020
8a797be
Merge branch 'master' of https://github.com/pandas-dev/pandas into se…
jbrockmendel Mar 5, 2020
280418e
comment
jbrockmendel Mar 6, 2020
4a847e5
whatsnew
jbrockmendel Mar 6, 2020
540d3ea
fix test on 32bit platform
jbrockmendel Mar 6, 2020
b75cf9f
Merge branch 'master' of https://github.com/pandas-dev/pandas into se…
jbrockmendel Mar 9, 2020
3a17ba8
comments
jbrockmendel Mar 9, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,7 @@ Indexing
- Bug in :meth:`Series.xs` incorrectly returning ``Timestamp`` instead of ``datetime64`` in some object-dtype cases (:issue:`31630`)
- Bug in :meth:`DataFrame.iat` incorrectly returning ``Timestamp`` instead of ``datetime`` in some object-dtype cases (:issue:`32809`)
- Bug in :meth:`Series.loc` and :meth:`DataFrame.loc` when indexing with an integer key on a object-dtype :class:`Index` that is not all-integers (:issue:`31905`)
-
- Bug in :meth:`DataFrame.iloc.__setitem__` on a :class:`DataFrame` with duplicate columns incorrectly setting values for all matching columns (:issue:`15686`, :issue:`22036`)

Missing
^^^^^^^
Expand Down
14 changes: 14 additions & 0 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -2706,6 +2706,20 @@ def _setitem_frame(self, key, value):
self._check_setitem_copy()
self._where(-key, value, inplace=True)

def _iset_item(self, loc: int, value):
self._ensure_valid_index(value)

# technically _sanitize_column expects a label, not a position,
# but the behavior is the same as long as we pass broadcast=False
value = self._sanitize_column(loc, value, broadcast=False)
NDFrame._iset_item(self, loc, value)

# check if we are modifying a copy
# try to set first as we want an invalid
# value exception to occur first
if len(self):
self._check_setitem_copy()

def _set_item(self, key, value):
"""
Add series to DataFrame in specified column.
Expand Down
4 changes: 4 additions & 0 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -3579,6 +3579,10 @@ def _slice(self: FrameOrSeries, slobj: slice, axis=0) -> FrameOrSeries:
result._set_is_copy(self, copy=is_copy)
return result

def _iset_item(self, loc: int, value) -> None:
self._data.iset(loc, value)
self._clear_item_cache()

def _set_item(self, key, value) -> None:
self._data.set(key, value)
self._clear_item_cache()
Expand Down
44 changes: 27 additions & 17 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1615,6 +1615,12 @@ def _setitem_with_indexer(self, indexer, value):
info_idx = [info_idx]
labels = item_labels[info_idx]

# Ensure we have something we can iterate over
ilocs = info_idx
if isinstance(info_idx, slice):
ri = Index(range(len(self.obj.columns)))
ilocs = ri[info_idx]

plane_indexer = indexer[:1]
lplane_indexer = length_of_indexer(plane_indexer[0], self.obj.index)
# lplane_indexer gives the expected length of obj[indexer[0]]
Expand All @@ -1632,9 +1638,11 @@ def _setitem_with_indexer(self, indexer, value):
"length than the value"
)

def setter(item, v):
ser = self.obj[item]
pi = plane_indexer[0] if lplane_indexer == 1 else plane_indexer
pi = plane_indexer[0] if lplane_indexer == 1 else plane_indexer

def isetter(loc, v):
# positional setting on column loc
ser = self.obj._ixs(loc, axis=1)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you comment that this is not a positional setter


# perform the equivalent of a setitem on the info axis
# as we have a null slice or a slice with full bounds
Expand All @@ -1654,7 +1662,7 @@ def setter(item, v):
ser._maybe_update_cacher(clear=True)

# reset the sliced object if unique
self.obj[item] = ser
self.obj._iset_item(loc, ser)

# we need an iterable, with a ndim of at least 1
# eg. don't pass through np.array(0)
Expand All @@ -1664,8 +1672,10 @@ def setter(item, v):
if isinstance(value, ABCDataFrame):
sub_indexer = list(indexer)
multiindex_indexer = isinstance(labels, ABCMultiIndex)
# TODO: we are implicitly assuming value.columns is unique

for item in labels:
for loc in ilocs:
item = item_labels[loc]
if item in value:
sub_indexer[info_axis] = item
v = self._align_series(
Expand All @@ -1674,7 +1684,7 @@ def setter(item, v):
else:
v = np.nan

setter(item, v)
isetter(loc, v)

# we have an equal len ndarray/convertible to our labels
# hasattr first, to avoid coercing to ndarray without reason.
Expand All @@ -1685,44 +1695,44 @@ def setter(item, v):
# note that this coerces the dtype if we are mixed
# GH 7551
value = np.array(value, dtype=object)
if len(labels) != value.shape[1]:
if len(ilocs) != value.shape[1]:
raise ValueError(
"Must have equal len keys and value "
"when setting with an ndarray"
)

for i, item in enumerate(labels):

for i, loc in enumerate(ilocs):
# setting with a list, re-coerces
setter(item, value[:, i].tolist())
isetter(loc, value[:, i].tolist())

elif (
len(labels) == 1
and lplane_indexer == len(value)
and not is_scalar(plane_indexer[0])
):
# we have an equal len list/ndarray
setter(labels[0], value)
# We only get here with len(labels) == len(ilocs) == 1
isetter(ilocs[0], value)

elif lplane_indexer == 0 and len(value) == len(self.obj.index):
# We get here in one case via .loc with a all-False mask
pass

else:
# per-label values
if len(labels) != len(value):
if len(ilocs) != len(value):
raise ValueError(
"Must have equal len keys and value "
"when setting with an iterable"
)

for item, v in zip(labels, value):
setter(item, v)
for loc, v in zip(ilocs, value):
isetter(loc, v)
else:

# scalar
for item in labels:
setter(item, value)
# scalar value
for loc in ilocs:
isetter(loc, value)

else:
if isinstance(indexer, tuple):
Expand Down
7 changes: 5 additions & 2 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1086,7 +1086,10 @@ def value_getitem(placement):
"Shape of new values must be compatible with manager shape"
)

if isinstance(loc, int):
if lib.is_integer(loc):
# We have 6 tests where loc is _not_ an int.
# In this case, get_blkno_placements will yield only one tuple,
# containing (self._blknos[loc], BlockPlacement(slice(0, 1, 1)))
loc = [loc]

# Accessing public blknos ensures the public versions are initialized
Expand Down Expand Up @@ -1138,7 +1141,7 @@ def value_getitem(placement):
# one item.
new_blocks.extend(
make_block(
values=value.copy(),
values=value,
ndim=self.ndim,
placement=slice(mgr_loc, mgr_loc + 1),
)
Expand Down
30 changes: 29 additions & 1 deletion pandas/tests/indexing/test_iloc.py
Original file line number Diff line number Diff line change
Expand Up @@ -349,7 +349,6 @@ def test_iloc_setitem_dups(self):
df = concat([df1, df2], axis=1)

expected = df.fillna(3)
expected["A"] = expected["A"].astype("float64")
inds = np.isnan(df.iloc[:, 0])
mask = inds[inds].index
df.iloc[mask, 0] = df.iloc[mask, 2]
Expand Down Expand Up @@ -694,3 +693,32 @@ def test_series_indexing_zerodim_np_array(self):
s = Series([1, 2])
result = s.iloc[np.array(0)]
assert result == 1


class TestILocSetItemDuplicateColumns:
def test_iloc_setitem_scalar_duplicate_columns(self):
# GH#15686, duplicate columns and mixed dtype
df1 = pd.DataFrame([{"A": None, "B": 1}, {"A": 2, "B": 2}])
df2 = pd.DataFrame([{"A": 3, "B": 3}, {"A": 4, "B": 4}])
df = pd.concat([df1, df2], axis=1)
df.iloc[0, 0] = -1

assert df.iloc[0, 0] == -1
assert df.iloc[0, 2] == 3
assert df.dtypes.iloc[2] == np.int64

def test_iloc_setitem_list_duplicate_columns(self):
# GH#22036 setting with same-sized list
df = pd.DataFrame([[0, "str", "str2"]], columns=["a", "b", "b"])

df.iloc[:, 2] = ["str3"]

expected = pd.DataFrame([[0, "str", "str3"]], columns=["a", "b", "b"])
tm.assert_frame_equal(df, expected)

def test_iloc_setitem_series_duplicate_columns(self):
df = pd.DataFrame(
np.arange(8, dtype=np.int64).reshape(2, 4), columns=["A", "B", "A", "B"]
)
df.iloc[:, 0] = df.iloc[:, 0].astype(np.float64)
assert df.dtypes.iloc[2] == np.int64