Skip to content
Merged
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,7 @@ Other API changes
- Changed behavior of :class:`Index` constructor with sequence containing at least one ``NaT`` and everything else either ``None`` or ``NaN`` to infer ``datetime64[ns]`` dtype instead of ``object``, matching :class:`Series` behavior (:issue:`49340`)
- :func:`read_stata` with parameter ``index_col`` set to ``None`` (the default) will now set the index on the returned :class:`DataFrame` to a :class:`RangeIndex` instead of a :class:`Int64Index` (:issue:`49745`)
- Changed behavior of :class:`Index` constructor with an object-dtype ``numpy.ndarray`` containing all-``bool`` values or all-complex values, this will now retain object dtype, consistent with the :class:`Series` behavior (:issue:`49594`)
- Changed behavior of :class:`Series` and :class:`DataFrame` constructors when given an integer dtype and floating-point data that is not round numbers, this now raises ``ValueError`` instead of silently retaining the float dtype; do ``Series(data).astype(dtype)`` or ``DataFrame(data).astype(dtype)`` to get the old behavior (:issue:`49599`)
- Changed behavior of :meth:`DataFrame.shift` with ``axis=1``, an integer ``fill_value``, and homogeneous datetime-like dtype, this now fills new columns with integer dtypes instead of casting to datetimelike (:issue:`49842`)
- :meth:`DataFrame.values`, :meth:`DataFrame.to_numpy`, :meth:`DataFrame.xs`, :meth:`DataFrame.reindex`, :meth:`DataFrame.fillna`, and :meth:`DataFrame.replace` no longer silently consolidate the underlying arrays; do ``df = df.copy()`` to ensure consolidation (:issue:`49356`)
-
Expand Down
55 changes: 3 additions & 52 deletions pandas/core/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
DtypeObj,
T,
)
from pandas.errors import IntCastingNaNError

from pandas.core.dtypes.base import (
ExtensionDtype,
Expand All @@ -46,7 +45,6 @@
is_datetime64_ns_dtype,
is_dtype_equal,
is_extension_array_dtype,
is_float_dtype,
is_integer_dtype,
is_list_like,
is_object_dtype,
Expand Down Expand Up @@ -503,7 +501,6 @@ def sanitize_array(
copy: bool = False,
*,
allow_2d: bool = False,
strict_ints: bool = False,
) -> ArrayLike:
"""
Sanitize input data to an ndarray or ExtensionArray, copy if specified,
Expand All @@ -517,8 +514,6 @@ def sanitize_array(
copy : bool, default False
allow_2d : bool, default False
If False, raise if we have a 2D Arraylike.
strict_ints : bool, default False
If False, silently ignore failures to cast float data to int dtype.

Returns
-------
Expand Down Expand Up @@ -571,32 +566,7 @@ def sanitize_array(
if isinstance(data, np.matrix):
data = data.A

if dtype is not None and is_float_dtype(data.dtype) and is_integer_dtype(dtype):
# possibility of nan -> garbage
try:
# GH 47391 numpy > 1.24 will raise a RuntimeError for nan -> int
# casting aligning with IntCastingNaNError below
with np.errstate(invalid="ignore"):
# GH#15832: Check if we are requesting a numeric dtype and
# that we can convert the data to the requested dtype.
subarr = maybe_cast_to_integer_array(data, dtype)

except IntCastingNaNError:
raise
except ValueError:
# Pre-2.0, we would have different behavior for Series vs DataFrame.
# DataFrame would call np.array(data, dtype=dtype, copy=copy),
# which would cast to the integer dtype even if the cast is lossy.
# See GH#40110.
if strict_ints:
raise

# We ignore the dtype arg and return floating values,
# e.g. test_constructor_floating_data_int_dtype
# TODO: where is the discussion that documents the reason for this?
subarr = np.array(data, copy=copy)

elif dtype is None:
if dtype is None:
subarr = data
if data.dtype == object:
subarr = maybe_infer_to_datetimelike(data)
Expand Down Expand Up @@ -629,27 +599,8 @@ def sanitize_array(
subarr = np.array([], dtype=np.float64)

elif dtype is not None:
try:
subarr = _try_cast(data, dtype, copy)
except ValueError:
if is_integer_dtype(dtype):
if strict_ints:
raise
casted = np.array(data, copy=False)
if casted.dtype.kind == "f":
# GH#40110 match the behavior we have if we passed
# a ndarray[float] to begin with
return sanitize_array(
casted,
index,
dtype,
copy=False,
allow_2d=allow_2d,
)
else:
raise
else:
raise
subarr = _try_cast(data, dtype, copy)

else:
subarr = maybe_convert_platform(data)
if subarr.dtype == object:
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -499,7 +499,7 @@ def __new__(
data = com.asarray_tuplesafe(data, dtype=_dtype_obj)

try:
arr = sanitize_array(data, None, dtype=dtype, copy=copy, strict_ints=True)
arr = sanitize_array(data, None, dtype=dtype, copy=copy)
except ValueError as err:
if "index must be specified when data is not list-like" in str(err):
raise cls._raise_scalar_data_error(data) from err
Expand Down
11 changes: 6 additions & 5 deletions pandas/tests/frame/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -2690,11 +2690,12 @@ def test_floating_values_integer_dtype(self):

arr = np.random.randn(10, 5)

# as of 2.0, we match Series behavior by retaining float dtype instead
# of doing a lossy conversion here. Below we _do_ do the conversion
# since it is lossless.
df = DataFrame(arr, dtype="i8")
assert (df.dtypes == "f8").all()
# GH#49599 in 2.0 we raise instead of either
# a) silently ignoring dtype and returningfloat (the old Series behavior) or
# b) rounding (the old DataFrame behavior)
msg = "Trying to coerce float values to integers"
with pytest.raises(ValueError, match=msg):
DataFrame(arr, dtype="i8")

df = DataFrame(arr.round(), dtype="i8")
assert (df.dtypes == "i8").all()
Expand Down
33 changes: 21 additions & 12 deletions pandas/tests/series/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -795,11 +795,13 @@ def test_constructor_floating_data_int_dtype(self, frame_or_series):
# not clear if this is what we want long-term
expected = frame_or_series(arr)

res = frame_or_series(arr, dtype="i8")
tm.assert_equal(res, expected)
# GH#49599 as of 2.0 we raise instead of silently retaining float dtype
msg = "Trying to coerce float values to integer"
with pytest.raises(ValueError, match=msg):
frame_or_series(arr, dtype="i8")

res = frame_or_series(list(arr), dtype="i8")
tm.assert_equal(res, expected)
with pytest.raises(ValueError, match=msg):
frame_or_series(list(arr), dtype="i8")

# pre-2.0, when we had NaNs, we silently ignored the integer dtype
arr[0] = np.nan
Expand All @@ -809,7 +811,12 @@ def test_constructor_floating_data_int_dtype(self, frame_or_series):
with pytest.raises(IntCastingNaNError, match=msg):
frame_or_series(arr, dtype="i8")

with pytest.raises(IntCastingNaNError, match=msg):
exc = IntCastingNaNError
if frame_or_series is Series:
# TODO: try to align these
exc = ValueError
msg = "cannot convert float NaN to integer"
with pytest.raises(exc, match=msg):
# same behavior if we pass list instead of the ndarray
frame_or_series(list(arr), dtype="i8")

Expand All @@ -827,13 +834,14 @@ def test_constructor_coerce_float_fail(self, any_int_numpy_dtype):
# see gh-15832
# Updated: make sure we treat this list the same as we would treat
# the equivalent ndarray
# GH#49599 pre-2.0 we silently retained float dtype, in 2.0 we raise
vals = [1, 2, 3.5]

res = Series(vals, dtype=any_int_numpy_dtype)
expected = Series(np.array(vals), dtype=any_int_numpy_dtype)
tm.assert_series_equal(res, expected)
alt = Series(np.array(vals)) # i.e. we ignore the dtype kwd
tm.assert_series_equal(alt, expected)
msg = "Trying to coerce float values to integer"
with pytest.raises(ValueError, match=msg):
Series(vals, dtype=any_int_numpy_dtype)
with pytest.raises(ValueError, match=msg):
Series(np.array(vals), dtype=any_int_numpy_dtype)

def test_constructor_coerce_float_valid(self, float_numpy_dtype):
s = Series([1, 2, 3.5], dtype=float_numpy_dtype)
Expand All @@ -847,9 +855,10 @@ def test_constructor_invalid_coerce_ints_with_float_nan(self, any_int_numpy_dtyp
vals = [1, 2, np.nan]
# pre-2.0 this would return with a float dtype, in 2.0 we raise

msg = r"Cannot convert non-finite values \(NA or inf\) to integer"
with pytest.raises(IntCastingNaNError, match=msg):
msg = "cannot convert float NaN to integer"
with pytest.raises(ValueError, match=msg):
Series(vals, dtype=any_int_numpy_dtype)
msg = r"Cannot convert non-finite values \(NA or inf\) to integer"
with pytest.raises(IntCastingNaNError, match=msg):
Series(np.array(vals), dtype=any_int_numpy_dtype)

Expand Down
7 changes: 4 additions & 3 deletions pandas/tests/test_downstream.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,9 +97,10 @@ def test_construct_dask_float_array_int_dtype_match_ndarray():
expected = Series(arr)
tm.assert_series_equal(res, expected)

res = Series(darr, dtype="i8")
expected = Series(arr, dtype="i8")
tm.assert_series_equal(res, expected)
# GH#49599 in 2.0 we raise instead of silently ignoring the dtype
msg = "Trying to coerce float values to integers"
with pytest.raises(ValueError, match=msg):
Series(darr, dtype="i8")

msg = r"Cannot convert non-finite values \(NA or inf\) to integer"
arr[2] = np.nan
Expand Down