Skip to content
37 changes: 31 additions & 6 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2245,18 +2245,34 @@ def _get_reconciled_name_object(self, other):
return self._shallow_copy(name=name)
return self

def union(self, other, sort=True):
def union(self, other, sort=None):
"""
Form the union of two Index objects.

Parameters
----------
other : Index or array-like
sort : bool, default True
Sort the resulting index if possible
sort : bool or None, default None
Whether to sort the resulting Index.

* None : Sort the result, except when

1. `self` and `other` are equal.
2. `self` or `other` has length 0.
3. Some values in `self` or `other` cannot be compared.
A RuntimeWarning is issued in this case.

* True : sort the result. A TypeError is raised when the
values cannot be compared.
* False : do not sort the result.

.. versionadded:: 0.24.0

.. versionchanged:: 0.24.0
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should be "0.24.1"


Changed the default `sort` to None, matching the
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why is this being changed? this is certainly not a regression at all. This was the default behavior.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To be clear: no behaviour is changed. It was indeed the default, it stays the default. It's only the value that encodes the default that is changed (True -> None), so that True can mean something else (=always sort).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok, maybe it should be more clear in the doc-string

behavior of pandas 0.23.4 and earlier.

Returns
-------
union : Index
Expand All @@ -2273,10 +2289,16 @@ def union(self, other, sort=True):
other = ensure_index(other)

if len(other) == 0 or self.equals(other):
return self._get_reconciled_name_object(other)
result = self._get_reconciled_name_object(other)
if sort:
result = result.sort_values()
return result

if len(self) == 0:
return other._get_reconciled_name_object(self)
result = other._get_reconciled_name_object(self)
if sort:
result = result.sort_values()
return result

# TODO: is_dtype_union_equal is a hack around
# 1. buggy set ops with duplicates (GH #13432)
Expand Down Expand Up @@ -2319,13 +2341,16 @@ def union(self, other, sort=True):
else:
result = lvals

if sort:
if sort is None:
try:
result = sorting.safe_sort(result)
except TypeError as e:
warnings.warn("{}, sort order is undefined for "
"incomparable objects".format(e),
RuntimeWarning, stacklevel=3)
elif sort:
# raise if not sortable.
result = sorting.safe_sort(result)

# for subclasses
return self._wrap_setop_result(other, result)
Expand Down
46 changes: 46 additions & 0 deletions pandas/tests/indexes/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -794,6 +794,52 @@ def test_union(self, sort):
tm.assert_index_equal(union, everything.sort_values())
assert tm.equalContents(union, everything)

def test_union_sort_other_equal(self):
a = pd.Index([1, 0, 2])
# default, sort=None
result = a.union(a)
tm.assert_index_equal(result, a)

# sort=True
result = a.union(a, sort=True)
expected = pd.Index([0, 1, 2])
tm.assert_index_equal(result, expected)

# sort=False
result = a.union(a, sort=False)
tm.assert_index_equal(result, a)

def test_union_sort_other_empty(self):
a = pd.Index([1, 0, 2])
# default, sort=None
tm.assert_index_equal(a.union(a[:0]), a)
tm.assert_index_equal(a[:0].union(a), a)

# sort=True
expected = pd.Index([0, 1, 2])
tm.assert_index_equal(a.union(a[:0], sort=True), expected)
tm.assert_index_equal(a[:0].union(a, sort=True), expected)

# sort=False
tm.assert_index_equal(a.union(a[:0], sort=False), a)
tm.assert_index_equal(a[:0].union(a, sort=False), a)

def test_union_sort_other_incomparable(self):
a = pd.Index([1, pd.Timestamp('2000')])
# default, sort=None
with tm.assert_produces_warning(RuntimeWarning):
result = a.union(a[:1])

tm.assert_index_equal(result, a)

# sort=True
with pytest.raises(TypeError, match='.*'):
a.union(a[:1], sort=True)

# sort=False
result = a.union(a[:1], sort=False)
tm.assert_index_equal(result, a)

@pytest.mark.parametrize("klass", [
np.array, Series, list])
@pytest.mark.parametrize("sort", [True, False])
Expand Down