Skip to content
Closed
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.20.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -79,3 +79,4 @@ Performance Improvements

Bug Fixes
~~~~~~~~~
- Bug in ``df.groupby`` causing an ``AttributeError`` when grouping a single index frame by a column and the index (:issue`14327`)
53 changes: 31 additions & 22 deletions pandas/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -2201,36 +2201,45 @@ def __init__(self, index, grouper=None, obj=None, name=None, level=None,
raise AssertionError('Level %s not in index' % str(level))
level = index.names.index(level)

inds = index.labels[level]
level_index = index.levels[level]

if self.name is None:
self.name = index.names[level]

# XXX complete hack
if isinstance(index, MultiIndex):
inds = index.labels[level]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i think this should be a private method in an Index instead (and overridden in MultiIndex)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you mean everything inside the if isinstance(index, MultiIndex): block? If so then it looks like the MultiIndex override would need to input grouper, index, and level and return a tuple of labels, level_index, and grouper. This seems a little messy to me since the parent Index method would have no use for the level parameter and would need to return None for the labels and level_index values.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just take args and return a tuples of things
no state is kept

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Got it, thanks for the clarification

level_index = index.levels[level]

if grouper is not None:
level_values = index.levels[level].take(inds)
self.grouper = level_values.map(self.grouper)
else:
# all levels may not be observed
labels, uniques = algos.factorize(inds, sort=True)
# XXX complete hack

if grouper is not None:
level_values = index.levels[level].take(inds)
self.grouper = level_values.map(self.grouper)
else:
# all levels may not be observed
labels, uniques = algos.factorize(inds, sort=True)

if len(uniques) > 0 and uniques[0] == -1:
# handle NAs
mask = inds != -1
ok_labels, uniques = algos.factorize(inds[mask], sort=True)
if len(uniques) > 0 and uniques[0] == -1:
# handle NAs
mask = inds != -1
ok_labels, uniques = algos.factorize(inds[mask],
sort=True)

labels = np.empty(len(inds), dtype=inds.dtype)
labels[mask] = ok_labels
labels[~mask] = -1
labels = np.empty(len(inds), dtype=inds.dtype)
labels[mask] = ok_labels
labels[~mask] = -1

if len(uniques) < len(level_index):
level_index = level_index.take(uniques)
if len(uniques) < len(level_index):
level_index = level_index.take(uniques)

self._labels = labels
self._group_index = level_index
self.grouper = level_index.take(labels)

# Single level index passed
else:
# Use single level index as grouper if none passed
if grouper is None:
self.grouper = index

self._labels = labels
self._group_index = level_index
self.grouper = level_index.take(labels)
else:
if isinstance(self.grouper, (list, tuple)):
self.grouper = com._asarray_tuplesafe(self.grouper)
Expand Down
22 changes: 22 additions & 0 deletions pandas/tests/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -458,6 +458,28 @@ def test_grouper_creation_bug(self):
expected = s.groupby(level='one').sum()
assert_series_equal(result, expected)

def test_grouper_column_and_index(self):
# GH 14327

# Grouping a multi-index frame by a column and an index level should
# be equivalent to resetting the index and grouping by two columns
idx = pd.MultiIndex.from_tuples([('a', 1), ('a', 2), ('a', 3),
('b', 1), ('b', 2), ('b', 3)])
idx.names = ['outer', 'inner']
df_multi = pd.DataFrame({"A": np.arange(6),
'B': ['one', 'one', 'two',
'two', 'one', 'one']},
index=idx)
result = df_multi.groupby(['B', pd.Grouper(level='inner')]).mean()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you try with these reversed as well, e.g. [pd.Grouper(....), 'B'])

expected = df_multi.reset_index().groupby(['B', 'inner']).mean()
assert_frame_equal(result, expected)

# Grouping a single-index frame by a column and the index should
# be equivalent to resetting the index and grouping by two columns
df_single = df_multi.reset_index('outer')
result = df_single.groupby(['B', pd.Grouper(level='inner')]).mean()
assert_frame_equal(result, expected)

def test_grouper_getting_correct_binner(self):

# GH 10063
Expand Down