Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.24.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1136,6 +1136,7 @@ Deprecations
- :func:`pandas.types.is_datetimetz` is deprecated in favor of `pandas.types.is_datetime64tz` (:issue:`23917`)
- Creating a :class:`TimedeltaIndex` or :class:`DatetimeIndex` by passing range arguments `start`, `end`, and `periods` is deprecated in favor of :func:`timedelta_range` and :func:`date_range` (:issue:`23919`)
- Passing a string alias like ``'datetime64[ns, UTC]'`` as the `unit` parameter to :class:`DatetimeTZDtype` is deprecated. Use :class:`DatetimeTZDtype.construct_from_string` instead (:issue:`23990`).
- The ``skipna`` parameter of :meth:`~pandas.api.types.infer_dtype` will switch to ``True`` by default in a future version of pandas (:issue:`17066`, :issue:`24050`)

.. _whatsnew_0240.deprecations.datetimelike_int_ops:

Expand Down
23 changes: 15 additions & 8 deletions pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ from fractions import Fraction
from numbers import Number

import sys
import warnings

import cython
from cython import Py_ssize_t
Expand Down Expand Up @@ -622,7 +623,7 @@ def clean_index_list(obj: list):
return obj, all_arrays

# don't force numpy coerce with nan's
inferred = infer_dtype(obj)
inferred = infer_dtype(obj, skipna=False)
if inferred in ['string', 'bytes', 'unicode', 'mixed', 'mixed-integer']:
return np.asarray(obj, dtype=object), 0
elif inferred in ['integer']:
Expand Down Expand Up @@ -1078,17 +1079,16 @@ cdef _try_infer_map(v):
return None


def infer_dtype(value: object, skipna: bool=False) -> str:
def infer_dtype(value: object, skipna: object=None) -> str:
"""
Efficiently infer the type of a passed val, or list-like
array of values. Return a string describing the type.

Parameters
----------
value : scalar, list, ndarray, or pandas type
skipna : bool, default False
Ignore NaN values when inferring the type. The default of ``False``
will be deprecated in a later version of pandas.
skipna : bool, default None
Ignore NaN values when inferring the type.

.. versionadded:: 0.21.0

Expand Down Expand Up @@ -1185,6 +1185,12 @@ def infer_dtype(value: object, skipna: bool=False) -> str:
bint seen_pdnat = False
bint seen_val = False

if skipna is None:
msg = ('A future version of pandas will default to `skipna=True`. To '
'silence this warning, pass `skipna=True|False` explicitly.')
warnings.warn(msg, FutureWarning, stacklevel=2)
skipna = False

if util.is_array(value):
values = value
elif hasattr(value, 'dtype'):
Expand All @@ -1209,6 +1215,10 @@ def infer_dtype(value: object, skipna: bool=False) -> str:
values = construct_1d_object_array_from_listlike(value)

values = getattr(values, 'values', values)

# make contiguous
values = values.ravel()

if skipna:
values = values[~isnaobj(values)]

Expand All @@ -1219,9 +1229,6 @@ def infer_dtype(value: object, skipna: bool=False) -> str:
if values.dtype != np.object_:
values = values.astype('O')

# make contiguous
values = values.ravel()

n = len(values)
if n == 0:
return 'empty'
Expand Down
6 changes: 3 additions & 3 deletions pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ def _ensure_arraylike(values):
ensure that we are arraylike if not already
"""
if not is_array_like(values):
inferred = lib.infer_dtype(values)
inferred = lib.infer_dtype(values, skipna=True)
if inferred in ['mixed', 'string', 'unicode']:
if isinstance(values, tuple):
values = list(values)
Expand Down Expand Up @@ -203,7 +203,7 @@ def _get_hashtable_algo(values):
if ndtype == 'object':

# its cheaper to use a String Hash Table than Object
if lib.infer_dtype(values) in ['string']:
if lib.infer_dtype(values, skipna=False) in ['string']:
ndtype = 'string'
else:
ndtype = 'object'
Expand All @@ -221,7 +221,7 @@ def _get_data_algo(values, func_map):
if ndtype == 'object':

# its cheaper to use a String Hash Table than Object
if lib.infer_dtype(values) in ['string']:
if lib.infer_dtype(values, skipna=False) in ['string']:
ndtype = 'string'

f = func_map.get(ndtype, func_map['object'])
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -1482,7 +1482,7 @@ def sequence_to_dt64ns(data, dtype=None, copy=False,
# TODO: We do not have tests specific to string-dtypes,
# also complex or categorical or other extension
copy = False
if lib.infer_dtype(data) == 'integer':
if lib.infer_dtype(data, skipna=True) == 'integer':
data = data.astype(np.int64)
else:
# data comes back here as either i8 to denote UTC timestamps
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/arrays/integer.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,8 +170,8 @@ def coerce_to_array(values, dtype, mask=None, copy=False):

values = np.array(values, copy=copy)
if is_object_dtype(values):
inferred_type = lib.infer_dtype(values)
if inferred_type is 'mixed' and isna(values).all():
inferred_type = lib.infer_dtype(values, skipna=True)
if inferred_type == 'empty':
values = np.empty(len(values))
values.fill(np.nan)
elif inferred_type not in ['floating', 'integer',
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -492,7 +492,7 @@ def __floordiv__(self, other):
elif is_object_dtype(other):
result = [self[n] // other[n] for n in range(len(self))]
result = np.array(result)
if lib.infer_dtype(result) == 'timedelta':
if lib.infer_dtype(result, skipna=True) == 'timedelta':
result, _ = sequence_to_td64ns(result)
return type(self)(result)
return result
Expand Down
7 changes: 4 additions & 3 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,8 @@ def trans(x):

if isinstance(dtype, string_types):
if dtype == 'infer':
inferred_type = lib.infer_dtype(ensure_object(result.ravel()))
inferred_type = lib.infer_dtype(ensure_object(result.ravel()),
skipna=True)
if inferred_type == 'boolean':
dtype = 'bool'
elif inferred_type == 'integer':
Expand Down Expand Up @@ -458,7 +459,7 @@ def infer_dtype_from_array(arr, pandas_dtype=False):
return arr.dtype, np.asarray(arr)

# don't force numpy coerce with nan's
inferred = lib.infer_dtype(arr)
inferred = lib.infer_dtype(arr, skipna=True)
if inferred in ['string', 'bytes', 'unicode',
'mixed', 'mixed-integer']:
return (np.object_, arr)
Expand Down Expand Up @@ -940,7 +941,7 @@ def try_timedelta(v):
# e.g. '00:00:01' is a timedelta but
# technically is also a datetime
value = try_timedelta(v)
if lib.infer_dtype(value) in ['mixed']:
if lib.infer_dtype(value, skipna=False) in ['mixed']:
value = try_datetime(v)

return value
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/dtypes/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -704,7 +704,8 @@ def is_datetime_arraylike(arr):
if isinstance(arr, ABCDatetimeIndex):
return True
elif isinstance(arr, (np.ndarray, ABCSeries)):
return arr.dtype == object and lib.infer_dtype(arr) == 'datetime'
return (arr.dtype == object
and lib.infer_dtype(arr, skipna=True) == 'datetime')
return getattr(arr, 'inferred_type', None) == 'datetime'


Expand Down
2 changes: 1 addition & 1 deletion pandas/core/dtypes/missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -470,7 +470,7 @@ def _infer_fill_value(val):
if is_datetimelike(val):
return np.array('NaT', dtype=val.dtype)
elif is_object_dtype(val.dtype):
dtype = lib.infer_dtype(ensure_object(val))
dtype = lib.infer_dtype(ensure_object(val), skipna=True)
if dtype in ['datetime', 'datetime64']:
return np.array('NaT', dtype=_NS_DTYPE)
elif dtype in ['timedelta', 'timedelta64']:
Expand Down
8 changes: 4 additions & 4 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,7 +341,7 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None,
# should not be coerced
# GH 11836
if is_integer_dtype(dtype):
inferred = lib.infer_dtype(data)
inferred = lib.infer_dtype(data, skipna=True)
if inferred == 'integer':
data = maybe_cast_to_integer_array(data, dtype,
copy=copy)
Expand Down Expand Up @@ -371,7 +371,7 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None,
else:
data = data.astype(dtype)
elif is_float_dtype(dtype):
inferred = lib.infer_dtype(data)
inferred = lib.infer_dtype(data, skipna=True)
if inferred == 'string':
pass
else:
Expand Down Expand Up @@ -409,7 +409,7 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None,
subarr = subarr.copy()

if dtype is None:
inferred = lib.infer_dtype(subarr)
inferred = lib.infer_dtype(subarr, skipna=False)
if inferred == 'integer':
try:
return cls._try_convert_to_int_index(
Expand Down Expand Up @@ -1731,7 +1731,7 @@ def inferred_type(self):
"""
Return a string of the type inferred from the values.
"""
return lib.infer_dtype(self)
return lib.infer_dtype(self, skipna=True)

@cache_readonly
def is_all_dates(self):
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -2233,7 +2233,8 @@ def _partial_tup_index(self, tup, side='left'):
section = labs[start:end]

if lab not in lev:
if not lev.is_type_compatible(lib.infer_dtype([lab])):
if not lev.is_type_compatible(lib.infer_dtype([lab],
skipna=True)):
raise TypeError('Level type mismatch: %s' % lab)

# short circuit
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/internals/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -658,7 +658,7 @@ def sanitize_array(data, index, dtype=None, copy=False,
subarr = np.array(data, dtype=object, copy=copy)

if is_object_dtype(subarr.dtype) and dtype != 'object':
inferred = lib.infer_dtype(subarr)
inferred = lib.infer_dtype(subarr, skipna=True)
if inferred == 'period':
try:
subarr = period_array(subarr)
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/reshape/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -942,7 +942,8 @@ def _maybe_coerce_merge_keys(self):
'representation', UserWarning)

# let's infer and see if we are ok
elif lib.infer_dtype(lk) == lib.infer_dtype(rk):
elif (lib.infer_dtype(lk, skipna=True)
== lib.infer_dtype(rk, skipna=True)):
pass

# Check if we are trying to merge on obviously
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/reshape/tile.py
Original file line number Diff line number Diff line change
Expand Up @@ -412,7 +412,7 @@ def _convert_bin_to_numeric_type(bins, dtype):
------
ValueError if bins are not of a compat dtype to dtype
"""
bins_dtype = infer_dtype(bins)
bins_dtype = infer_dtype(bins, skipna=True)
if is_timedelta64_dtype(dtype):
if bins_dtype in ['timedelta', 'timedelta64']:
bins = to_timedelta(bins).view(np.int64)
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -869,7 +869,7 @@ def _get_with(self, key):
if isinstance(key, Index):
key_type = key.inferred_type
else:
key_type = lib.infer_dtype(key)
key_type = lib.infer_dtype(key, skipna=True)

if key_type == 'integer':
if self.index.is_integer() or self.index.is_floating():
Expand Down Expand Up @@ -1006,7 +1006,7 @@ def _set_with(self, key, value):
if isinstance(key, Index):
key_type = key.inferred_type
else:
key_type = lib.infer_dtype(key)
key_type = lib.infer_dtype(key, skipna=True)

if key_type == 'integer':
if self.index.inferred_type == 'integer':
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/sorting.py
Original file line number Diff line number Diff line change
Expand Up @@ -454,7 +454,7 @@ def sort_mixed(values):
return np.concatenate([nums, np.asarray(strs, dtype=object)])

sorter = None
if PY3 and lib.infer_dtype(values) == 'mixed-integer':
if PY3 and lib.infer_dtype(values, skipna=True) == 'mixed-integer':
# unorderable in py3 if mixed str/int
ordered = sort_mixed(values)
else:
Expand Down
2 changes: 1 addition & 1 deletion pandas/io/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1299,7 +1299,7 @@ def _validate_usecols_arg(usecols):
elif not is_list_like(usecols):
raise ValueError(msg)
else:
usecols_dtype = lib.infer_dtype(usecols)
usecols_dtype = lib.infer_dtype(usecols, skipna=True)
if usecols_dtype not in ('empty', 'integer',
'string', 'unicode'):
raise ValueError(msg)
Expand Down
12 changes: 6 additions & 6 deletions pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -1948,7 +1948,7 @@ def set_atom(self, block, block_items, existing_col, min_itemsize,
return self.set_atom_complex(block)

dtype = block.dtype.name
inferred_type = lib.infer_dtype(block.values)
inferred_type = lib.infer_dtype(block.values, skipna=True)

if inferred_type == 'date':
raise TypeError(
Expand Down Expand Up @@ -1994,15 +1994,15 @@ def set_atom_string(self, block, block_items, existing_col, min_itemsize,
data = block.values

# see if we have a valid string type
inferred_type = lib.infer_dtype(data.ravel())
inferred_type = lib.infer_dtype(data.ravel(), skipna=True)
if inferred_type != 'string':

# we cannot serialize this data, so report an exception on a column
# by column basis
for i, item in enumerate(block_items):

col = block.iget(i)
inferred_type = lib.infer_dtype(col.ravel())
inferred_type = lib.infer_dtype(col.ravel(), skipna=True)
if inferred_type != 'string':
raise TypeError(
"Cannot serialize the column [%s] because\n"
Expand Down Expand Up @@ -2739,7 +2739,7 @@ def write_array(self, key, value, items=None):

# infer the type, warn if we have a non-string type here (for
# performance)
inferred_type = lib.infer_dtype(value.ravel())
inferred_type = lib.infer_dtype(value.ravel(), skipna=True)
if empty_array:
pass
elif inferred_type == 'string':
Expand Down Expand Up @@ -4506,7 +4506,7 @@ def _convert_index(index, encoding=None, errors='strict', format_type=None):
if isinstance(index, MultiIndex):
raise TypeError('MultiIndex not supported here!')

inferred_type = lib.infer_dtype(index)
inferred_type = lib.infer_dtype(index, skipna=True)

values = np.asarray(index)

Expand Down Expand Up @@ -4739,7 +4739,7 @@ def __init__(self, table, where=None, start=None, stop=None):

# see if we have a passed coordinate like
try:
inferred = lib.infer_dtype(where)
inferred = lib.infer_dtype(where, skipna=True)
if inferred == 'integer' or inferred == 'boolean':
where = np.asarray(where)
if where.dtype == np.bool_:
Expand Down
23 changes: 7 additions & 16 deletions pandas/io/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -819,27 +819,15 @@ def _harmonize_columns(self, parse_dates=None):
except KeyError:
pass # this column not in results

def _get_notna_col_dtype(self, col):
"""
Infer datatype of the Series col. In case the dtype of col is 'object'
and it contains NA values, this infers the datatype of the not-NA
values. Needed for inserting typed data containing NULLs, GH8778.
"""
col_for_inference = col
if col.dtype == 'object':
notnadata = col[~isna(col)]
if len(notnadata):
col_for_inference = notnadata

return lib.infer_dtype(col_for_inference)

def _sqlalchemy_type(self, col):

dtype = self.dtype or {}
if col.name in dtype:
return self.dtype[col.name]

col_type = self._get_notna_col_dtype(col)
# Infer type of column, while ignoring missing values.
# Needed for inserting typed data containing NULLs, GH 8778.
col_type = lib.infer_dtype(col, skipna=True)

from sqlalchemy.types import (BigInteger, Integer, Float,
Text, Boolean,
Expand Down Expand Up @@ -1325,7 +1313,10 @@ def _sql_type_name(self, col):
if col.name in dtype:
return dtype[col.name]

col_type = self._get_notna_col_dtype(col)
# Infer type of column, while ignoring missing values.
# Needed for inserting typed data containing NULLs, GH 8778.
col_type = lib.infer_dtype(col, skipna=True)

if col_type == 'timedelta64':
warnings.warn("the 'timedelta' type is not supported, and will be "
"written as integer values (ns frequency) to the "
Expand Down
Loading