Skip to content
Merged
5 changes: 5 additions & 0 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2320,6 +2320,11 @@ def symmetric_difference(self, other, result_name=None):
except TypeError:
pass

# On equal MultiIndexes the difference is empty. Therefore an empty
# MultiIndex is returned GH13490
if self.nlevels > 1 and len(the_diff) == 0:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This could use a comment explaining why we need the special case.

return type(self)([[] for _ in range(self.nlevels)],
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

use self._shallow_copy

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That wouldn't work, self._shallow_copy fails if the_diff is empty. That's why I am returning an empty MI instead.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In [2]: pd.MultiIndex([[], []], [[],[]])
Out[2]: 
MultiIndex(levels=[[], []],
           labels=[[], []])

In [3]: pd.MultiIndex([[], []], [[],[]])._shallow_copy()
Out[3]: 
MultiIndex(levels=[[], []],
           labels=[[], []])

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry I was confusing it with self._shallow_copy_with_infer. But still, if I call self._shallow_copy with the_diff being empty from_tuples gives me TypeError: Cannot infer number of levels from empty list

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jreback what do you think about a special case in MulltiIndex._shallow_copy when values is length 0 (that's what is passed from symmetric_difference)?

    @Appender(_index_shared_docs['_shallow_copy'])
    def _shallow_copy(self, values=None, **kwargs):
        if values is not None:
            if 'name' in kwargs:
                kwargs['names'] = kwargs.pop('name', None)
            # discards freq
            kwargs.pop('freq', None)
            # this if block is new
            if len(values) == 0:
                return MultiIndex(levels=[[] for _ in range(self.nlevels)],
                                  labels=[[] for _ in range(self.nlabels)])
            return MultiIndex.from_tuples(values, **kwargs)
        return self.view()

this would "work" but I don't know if "array of length 0" means same structure, but empty. Maybe it's ok.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this change should be done in _shallow_copy_with_infer, you need to construct a MI equiv of self[0:0]

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this change should be done in _shallow_copy_with_infer, you need to construct a MI equiv of self[0:0]

This leaves around some levels unfortunately:

In [16]: idx = pd.MultiIndex.from_product([['a', 'b'], ['A', 'B']])

In [17]: idx[0:0]
Out[17]:
MultiIndex(levels=[['a', 'b'], ['A', 'B']],
           labels=[[], []])

In [18]: idx.difference(idx) | idx.difference(idx)
Out[18]:
MultiIndex(levels=[[], []],
           labels=[[], []])

I believe we want Out[18]

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok, then I guess you can special case values=None in _shallow_copy to take a different path of construction (IOW don't use the MultiIndex.from_tuples). The crucial point is to propogate the meta-data.

[[] for _ in range(self.nlevels)])
attribs = self._get_attributes_dict()
attribs['name'] = result_name
if 'freq' in attribs:
Expand Down
10 changes: 8 additions & 2 deletions pandas/tests/indexes/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,6 @@ def test_constructor_ndarray_like(self):
# it should be possible to convert any object that satisfies the numpy
# ndarray interface directly into an Index
class ArrayLike(object):

def __init__(self, array):
self.array = array

Expand Down Expand Up @@ -246,7 +245,6 @@ def test_index_ctor_infer_nan_nat(self):
[np.timedelta64('nat'), np.nan],
[pd.NaT, np.timedelta64('nat')],
[np.timedelta64('nat'), pd.NaT]]:

tm.assert_index_equal(Index(data), exp)
tm.assert_index_equal(Index(np.array(data, dtype=object)), exp)

Expand Down Expand Up @@ -936,6 +934,14 @@ def test_symmetric_difference(self):
assert tm.equalContents(result, expected)
assert result.name == 'new_name'

def test_symmetric_difference_on_equal_multiindex(self):
# GH13490
idx1 = MultiIndex.from_tuples(self.tuples)
idx2 = MultiIndex.from_tuples(self.tuples)
result = idx1.symmetric_difference(idx2)
expected = MultiIndex(levels=[[], []], labels=[[], []])
assert tm.equalContents(result, expected)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

use tm.assert_index_equal

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would the expected value be None then?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why?

tm.assert_index_equal(result, expected) will work this compares the indexes, including all meta-data.

in fact your example is NOT propogating meta-data (that's what _shallow_copy does and why you need to use it). add some names for the levels and see.


def test_is_numeric(self):
assert not self.dateIndex.is_numeric()
assert not self.strIndex.is_numeric()
Expand Down