Skip to content

Commit 863fb52

Browse files
committed
Merge remote-tracking branch 'upstream/master' into fstring-core-reshape
2 parents 3aa1ffc + 71b7868 commit 863fb52

File tree

293 files changed

+1777
-1513
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

293 files changed

+1777
-1513
lines changed

.github/FUNDING.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
custom: https://pandas.pydata.org/donate.html
2+
github: [numfocus]
23
tidelift: pypi/pandas

asv_bench/benchmarks/gil.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ def wrapper(fname):
3737
return wrapper
3838

3939

40-
from .pandas_vb_common import BaseIO # noqa: E402 isort:skip
40+
from .pandas_vb_common import BaseIO # isort:skip
4141

4242

4343
class ParallelGroupbyMethods:

asv_bench/benchmarks/offset.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import pandas as pd
44

55
try:
6-
import pandas.tseries.holiday # noqa
6+
import pandas.tseries.holiday
77
except ImportError:
88
pass
99

ci/code_checks.sh

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
5656
black --version
5757

5858
MSG='Checking black formatting' ; echo $MSG
59-
black . --check --exclude '(asv_bench/env|\.egg|\.git|\.hg|\.mypy_cache|\.nox|\.tox|\.venv|_build|buck-out|build|dist|setup.py)'
59+
black . --check
6060
RET=$(($RET + $?)) ; echo $MSG "DONE"
6161

6262
# `setup.cfg` contains the list of error codes that are being ignored in flake8
@@ -190,6 +190,10 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
190190
invgrep -R --include="*.rst" ".. ipython ::" doc/source
191191
RET=$(($RET + $?)) ; echo $MSG "DONE"
192192

193+
    MSG='Check for extra blank lines after the class definition' ; echo $MSG
194+
    invgrep -R --include="*.py" --include="*.pyx" -E 'class.*:\n\n( )+"""' .
195+
    RET=$(($RET + $?)) ; echo $MSG "DONE"
196+
193197
MSG='Check that no file in the repo contains trailing whitespaces' ; echo $MSG
194198
set -o pipefail
195199
if [[ "$AZURE" == "true" ]]; then

doc/source/user_guide/integer_na.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ you must explicitly pass the dtype into :meth:`array` or :class:`Series`:
3030

3131
.. ipython:: python
3232
33-
arr = pd.array([1, 2, np.nan], dtype=pd.Int64Dtype())
33+
arr = pd.array([1, 2, None], dtype=pd.Int64Dtype())
3434
arr
3535
3636
Or the string alias ``"Int64"`` (note the capital ``"I"``, to differentiate from
@@ -63,7 +63,7 @@ up with a ``float64`` dtype Series:
6363
pd.Series([1, 2, np.nan])
6464
6565
Operations involving an integer array will behave similar to NumPy arrays.
66-
Missing values will be propagated, and and the data will be coerced to another
66+
Missing values will be propagated, and the data will be coerced to another
6767
dtype if needed.
6868

6969
.. ipython:: python

doc/source/user_guide/io.rst

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4671,7 +4671,6 @@ See the `Full Documentation <https://github.com/wesm/feather>`__.
46714671
Write to a feather file.
46724672

46734673
.. ipython:: python
4674-
:okwarning:
46754674
46764675
df.to_feather('example.feather')
46774676
@@ -4748,7 +4747,6 @@ See the documentation for `pyarrow <https://arrow.apache.org/docs/python/>`__ an
47484747
Write to a parquet file.
47494748

47504749
.. ipython:: python
4751-
:okwarning:
47524750
47534751
df.to_parquet('example_pa.parquet', engine='pyarrow')
47544752
df.to_parquet('example_fp.parquet', engine='fastparquet')
@@ -4765,7 +4763,6 @@ Read from a parquet file.
47654763
Read only certain columns of a parquet file.
47664764

47674765
.. ipython:: python
4768-
:okwarning:
47694766
47704767
result = pd.read_parquet('example_fp.parquet',
47714768
engine='fastparquet', columns=['a', 'b'])
@@ -4788,7 +4785,6 @@ Serializing a ``DataFrame`` to parquet may include the implicit index as one or
47884785
more columns in the output file. Thus, this code:
47894786

47904787
.. ipython:: python
4791-
:okwarning:
47924788
47934789
df = pd.DataFrame({'a': [1, 2], 'b': [3, 4]})
47944790
df.to_parquet('test.parquet', engine='pyarrow')
@@ -4805,7 +4801,6 @@ If you want to omit a dataframe's indexes when writing, pass ``index=False`` to
48054801
:func:`~pandas.DataFrame.to_parquet`:
48064802

48074803
.. ipython:: python
4808-
:okwarning:
48094804
48104805
df.to_parquet('test.parquet', index=False)
48114806

doc/source/whatsnew/v0.20.0.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ Check the :ref:`API Changes <whatsnew_0200.api_breaking>` and :ref:`deprecations
3333

3434
.. note::
3535

36-
This is a combined release for 0.20.0 and and 0.20.1.
36+
This is a combined release for 0.20.0 and 0.20.1.
3737
Version 0.20.1 contains one additional change for backwards-compatibility with downstream projects using pandas' ``utils`` routines. (:issue:`16250`)
3838

3939
.. contents:: What's new in v0.20.0

doc/source/whatsnew/v1.0.0.rst

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -314,11 +314,11 @@ Datetimelike
314314
- Bug in :func:`pandas.core.groupby.generic.SeriesGroupBy.apply` raising ``ValueError`` when a column in the original DataFrame is a datetime and the column labels are not standard integers (:issue:`28247`)
315315
- Bug in :func:`pandas._config.localization.get_locales` where the ``locales -a`` encodes the locales list as windows-1252 (:issue:`23638`, :issue:`24760`, :issue:`27368`)
316316
- Bug in :meth:`Series.var` failing to raise ``TypeError`` when called with ``timedelta64[ns]`` dtype (:issue:`28289`)
317-
-
317+
- Bug in :meth:`DatetimeIndex.strftime` and :meth:`Series.dt.strftime` where ``NaT`` was converted to the string ``'NaT'`` instead of ``np.nan`` (:issue:`29578`)
318318

319319
Timedelta
320320
^^^^^^^^^
321-
321+
- Bug in subtracting a :class:`TimedeltaIndex` or :class:`TimedeltaArray` from a ``np.datetime64`` object (:issue:`29558`)
322322
-
323323
-
324324

@@ -400,6 +400,8 @@ I/O
400400
- Bug in :meth:`DataFrame.to_html` when using ``formatters=<list>`` and ``max_cols`` together. (:issue:`25955`)
401401
- Bug in :meth:`Styler.background_gradient` not able to work with dtype ``Int64`` (:issue:`28869`)
402402
- Bug in :meth:`DataFrame.to_clipboard` which did not work reliably in ipython (:issue:`22707`)
403+
- Bug in :func:`read_json` where default encoding was not set to ``utf-8`` (:issue:`29565`)
404+
-
403405

404406
Plotting
405407
^^^^^^^^

pandas/_libs/indexing.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ cdef class _NDFrameIndexerBase:
1111
self._ndim = None
1212

1313
@property
14-
def ndim(self):
14+
def ndim(self) -> int:
1515
# Delay `ndim` instantiation until required as reading it
1616
# from `obj` isn't entirely cheap.
1717
ndim = self._ndim

pandas/_libs/lib.pyx

Lines changed: 48 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -971,6 +971,7 @@ cdef class Seen:
971971
bint nat_ # seen nat
972972
bint bool_ # seen_bool
973973
bint null_ # seen_null
974+
bint nan_ # seen_np.nan
974975
bint uint_ # seen_uint (unsigned integer)
975976
bint sint_ # seen_sint (signed integer)
976977
bint float_ # seen_float
@@ -995,6 +996,7 @@ cdef class Seen:
995996
self.nat_ = 0
996997
self.bool_ = 0
997998
self.null_ = 0
999+
self.nan_ = 0
9981000
self.uint_ = 0
9991001
self.sint_ = 0
10001002
self.float_ = 0
@@ -1953,10 +1955,37 @@ def maybe_convert_numeric(ndarray[object] values, set na_values,
19531955
@cython.wraparound(False)
19541956
def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
19551957
bint safe=0, bint convert_datetime=0,
1956-
bint convert_timedelta=0):
1958+
bint convert_timedelta=0,
1959+
bint convert_to_nullable_integer=0):
19571960
"""
19581961
Type inference function-- convert object array to proper dtype
1962+
1963+
Parameters
1964+
----------
1965+
values : ndarray
1966+
Array of object elements to convert.
1967+
try_float : bool, default False
1968+
If an array-like object contains only float or NaN values is
1969+
encountered, whether to convert and return an array of float dtype.
1970+
safe : bool, default False
1971+
Whether to upcast numeric type (e.g. int cast to float). If set to
1972+
True, no upcasting will be performed.
1973+
convert_datetime : bool, default False
1974+
If an array-like object contains only datetime values or NaT is
1975+
encountered, whether to convert and return an array of M8[ns] dtype.
1976+
convert_timedelta : bool, default False
1977+
If an array-like object contains only timedelta values or NaT is
1978+
encountered, whether to convert and return an array of m8[ns] dtype.
1979+
convert_to_nullable_integer : bool, default False
1980+
If an array-like object contains only interger values (and NaN) is
1981+
encountered, whether to convert and return an IntegerArray.
1982+
1983+
Returns
1984+
-------
1985+
array : array of converted object values to more specific dtypes if
1986+
pplicable
19591987
"""
1988+
19601989
cdef:
19611990
Py_ssize_t i, n
19621991
ndarray[float64_t] floats
@@ -1977,6 +2006,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
19772006
ints = np.empty(n, dtype='i8')
19782007
uints = np.empty(n, dtype='u8')
19792008
bools = np.empty(n, dtype=np.uint8)
2009+
mask = np.full(n, False)
19802010

19812011
if convert_datetime:
19822012
datetimes = np.empty(n, dtype='M8[ns]')
@@ -1994,6 +2024,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
19942024
if val is None:
19952025
seen.null_ = 1
19962026
floats[i] = complexes[i] = fnan
2027+
mask[i] = True
19972028
elif val is NaT:
19982029
seen.nat_ = 1
19992030
if convert_datetime:
@@ -2003,6 +2034,10 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
20032034
if not (convert_datetime or convert_timedelta):
20042035
seen.object_ = 1
20052036
break
2037+
elif val is np.nan:
2038+
seen.nan_ = 1
2039+
mask[i] = True
2040+
floats[i] = complexes[i] = val
20062041
elif util.is_bool_object(val):
20072042
seen.bool_ = 1
20082043
bools[i] = val
@@ -2084,11 +2119,19 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
20842119

20852120
if not seen.object_:
20862121
if not safe:
2087-
if seen.null_:
2122+
if seen.null_ or seen.nan_:
20882123
if seen.is_float_or_complex:
20892124
if seen.complex_:
20902125
return complexes
2091-
elif seen.float_ or seen.int_:
2126+
elif seen.float_:
2127+
return floats
2128+
elif seen.int_:
2129+
if convert_to_nullable_integer:
2130+
from pandas.core.arrays import IntegerArray
2131+
return IntegerArray(ints, mask)
2132+
else:
2133+
return floats
2134+
elif seen.nan_:
20922135
return floats
20932136
else:
20942137
if not seen.bool_:
@@ -2127,7 +2170,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
21272170
if seen.complex_:
21282171
if not seen.int_:
21292172
return complexes
2130-
elif seen.float_:
2173+
elif seen.float_ or seen.nan_:
21312174
if not seen.int_:
21322175
return floats
21332176
else:
@@ -2151,7 +2194,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
21512194
if seen.complex_:
21522195
if not seen.int_:
21532196
return complexes
2154-
elif seen.float_:
2197+
elif seen.float_ or seen.nan_:
21552198
if not seen.int_:
21562199
return floats
21572200
elif seen.int_:

0 commit comments

Comments
 (0)