pandas-dev · mroeschke · Mar 10, 2023 · Mar 4, 2023 · Mar 4, 2023 · Mar 4, 2023
diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst
@@ -109,6 +109,7 @@ Performance improvements
 - Performance improvement in :meth:`DataFrame.where` when ``cond`` is backed by an extension dtype (:issue:`51574`)
 - Performance improvement in :meth:`read_orc` when reading a remote URI file path. (:issue:`51609`)
 - Performance improvement in :meth:`~arrays.ArrowExtensionArray.isna` when array has zero nulls or is all nulls (:issue:`51630`)
+- Performance improvement in :meth:`Series.combine_first` (:issue:`51777`)
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_210.bug_fixes:
@@ -195,7 +196,7 @@ Groupby/resample/rolling
 
 Reshaping
 ^^^^^^^^^
--
+- Bug in :meth:`Series.combine_first` converting ``int64`` dtype to ``float64`` and losing precision on very large integers (:issue:`51764`)
 -
 
 Sparse

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -3264,13 +3264,23 @@ def combine_first(self, other) -> Series:
         falcon      NaN
         dtype: float64
         """
+        from pandas.core.reshape.concat import concat
+
         new_index = self.index.union(other.index)
-        this = self.reindex(new_index, copy=False)
-        other = other.reindex(new_index, copy=False)
+
+        this = self
+        null_mask = isna(this)
+        if null_mask.any():
+            drop = this.index[null_mask].intersection(other.index[notna(other)])
+            if len(drop):
+                this = this.drop(drop)
+
+        other = other.reindex(other.index.difference(this.index), copy=False)
         if this.dtype.kind == "M" and other.dtype.kind != "M":
             other = to_datetime(other)
-
-        return this.where(notna(this), other)
+        combined = concat([this, other])
+        combined = combined.reindex(new_index, copy=False)
+        return combined.__finalize__(self, method="combine_first")
 
     def update(self, other: Series | Sequence | Mapping) -> None:
         """

diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py
@@ -327,13 +327,6 @@ def test_where_series(self, data, na_value):
         self.assert_series_equal(result, expected)
 
     def test_combine_first(self, data, request):
-        if data.dtype.subtype == "int":
-            # Right now this is upcasted to float, just like combine_first
-            # for Series[int]
-            mark = pytest.mark.xfail(
-                reason="TODO(SparseArray.__setitem__) will preserve dtype."
-            )
-            request.node.add_marker(mark)
         super().test_combine_first(data)
 
     def test_searchsorted(self, data_for_sorting, as_series):

diff --git a/pandas/tests/series/methods/test_combine_first.py b/pandas/tests/series/methods/test_combine_first.py
@@ -112,3 +112,11 @@ def test_combine_first_timezone_series_with_empty_series(self):
         s2 = Series(index=time_index)
         result = s1.combine_first(s2)
         tm.assert_series_equal(result, s1)
+
+    def test_combine_first_preserves_dtype(self):
+        # GH51764
+        s1 = Series([np.iinfo(np.int64).min, np.iinfo(np.int64).max])
+        s2 = Series([1, 2, 3])
+        result = s1.combine_first(s2)
+        expected = Series([np.iinfo(np.int64).min, np.iinfo(np.int64).max, 3])
+        tm.assert_series_equal(result, expected)