diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 3e559e771f126..a4c5f1bdb439d 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -273,7 +273,7 @@ Datetimelike - Bug in :class:`DataFrame` and :class:`Series` where timezone aware data with ``dtype='datetime64[ns]`` was not cast to naive (:issue:`25843`) - Improved :class:`Timestamp` type checking in various datetime functions to prevent exceptions when using a subclassed ``datetime`` (:issue:`25851`) - Bug in :class:`Series` and :class:`DataFrame` repr where ``np.datetime64('NaT')`` and ``np.timedelta64('NaT')`` with ``dtype=object`` would be represented as ``NaN`` (:issue:`25445`) -- +- Bug in :func:`to_datetime` which does not replace the invalid argument with ``NaT`` when error is set to coerce (:issue:`26122`) Timedelta ^^^^^^^^^ diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 50e3fb1c38cc7..83e803aa9bf4a 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -682,7 +682,8 @@ cpdef array_to_datetime(ndarray[object] values, str errors='raise', return ignore_errors_out_of_bounds_fallback(values), tz_out except TypeError: - return array_to_datetime_object(values, is_raise, dayfirst, yearfirst) + return array_to_datetime_object(values, errors, + dayfirst, yearfirst) if seen_datetime and seen_integer: # we have mixed datetimes & integers @@ -697,7 +698,7 @@ cpdef array_to_datetime(ndarray[object] values, str errors='raise', elif is_raise: raise ValueError("mixed datetimes and integers in passed array") else: - return array_to_datetime_object(values, is_raise, + return array_to_datetime_object(values, errors, dayfirst, yearfirst) if seen_datetime_offset and not utc_convert: @@ -709,7 +710,7 @@ cpdef array_to_datetime(ndarray[object] values, str errors='raise', # (with individual dateutil.tzoffsets) are returned is_same_offsets = len(out_tzoffset_vals) == 1 if not is_same_offsets: - return array_to_datetime_object(values, is_raise, + return array_to_datetime_object(values, errors, dayfirst, yearfirst) else: tz_offset = out_tzoffset_vals.pop() @@ -757,7 +758,7 @@ cdef inline ignore_errors_out_of_bounds_fallback(ndarray[object] values): @cython.wraparound(False) @cython.boundscheck(False) -cdef array_to_datetime_object(ndarray[object] values, bint is_raise, +cdef array_to_datetime_object(ndarray[object] values, str errors, bint dayfirst=False, bint yearfirst=False): """ Fall back function for array_to_datetime @@ -769,7 +770,7 @@ cdef array_to_datetime_object(ndarray[object] values, bint is_raise, ---------- values : ndarray of object date-like objects to convert - is_raise : bool + errors : str, default 'raise' error behavior when parsing dayfirst : bool, default False dayfirst parsing behavior when encountering datetime strings @@ -783,9 +784,14 @@ cdef array_to_datetime_object(ndarray[object] values, bint is_raise, cdef: Py_ssize_t i, n = len(values) object val, + bint is_ignore = errors == 'ignore' + bint is_coerce = errors == 'coerce' + bint is_raise = errors == 'raise' ndarray[object] oresult npy_datetimestruct dts + assert is_raise or is_ignore or is_coerce + oresult = np.empty(n, dtype=object) # We return an object array and only attempt to parse: @@ -806,6 +812,9 @@ cdef array_to_datetime_object(ndarray[object] values, bint is_raise, pydatetime_to_dt64(oresult[i], &dts) check_dts_bounds(&dts) except (ValueError, OverflowError): + if is_coerce: + oresult[i] = NaT + continue if is_raise: raise return values, None diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index d6627f0fb8b72..fea2f1e9f3ef2 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -712,6 +712,19 @@ def test_week_without_day_and_calendar_year(self, date, format): with pytest.raises(ValueError, match=msg): pd.to_datetime(date, format=format) + def test_to_datetime_coerce(self): + # GH 26122 + ts_strings = ['March 1, 2018 12:00:00+0400', + 'March 1, 2018 12:00:00+0500', + '20100240'] + result = to_datetime(ts_strings, errors='coerce') + expected = Index([datetime(2018, 3, 1, 12, 0, + tzinfo=tzoffset(None, 14400)), + datetime(2018, 3, 1, 12, 0, + tzinfo=tzoffset(None, 18000)), + NaT]) + tm.assert_index_equal(result, expected) + def test_iso_8601_strings_with_same_offset(self): # GH 17697, 11736 ts_str = "2015-11-18 15:30:00+05:30"