Skip to content

Commit 5ec4611

Browse files
Jean-Francois ZinquecosmicBboy
authored andcommitted
Fix remaining unrecognized numpy dtypes (#637)
* add test for all pandas-compatible numpy dtypes * add support for np.bytes_ * add support for rare object aliases * add support for platform-specific numpy dtypes
1 parent 96d6516 commit 5ec4611

File tree

3 files changed

+42
-1
lines changed

3 files changed

+42
-1
lines changed

pandera/engines/numpy_engine.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -313,6 +313,17 @@ class Complex64(Complex128):
313313
bit_width: int = 64
314314

315315

316+
###############################################################################
317+
# bytes
318+
###############################################################################
319+
320+
321+
@Engine.register_dtype(equivalents=["bytes", bytes, np.bytes_])
322+
@immutable
323+
class Bytes(DataType):
324+
type = np.dtype("bytes")
325+
326+
316327
###############################################################################
317328
# string
318329
###############################################################################

pandera/engines/pandas_engine.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,10 @@ def dtype(cls, data_type: Any) -> "DataType":
137137
# into a numpy or pandas dtype.
138138
np_or_pd_dtype = pd.api.types.pandas_dtype(data_type)
139139
if isinstance(np_or_pd_dtype, np.dtype):
140-
np_or_pd_dtype = np_or_pd_dtype.type
140+
# cast alias to platform-agnostic dtype
141+
# e.g.: np.intc -> np.int32
142+
common_np_dtype = np.dtype(np_or_pd_dtype.name)
143+
np_or_pd_dtype = common_np_dtype.type
141144

142145
return engine.Engine.dtype(cls, np_or_pd_dtype)
143146

@@ -454,13 +457,18 @@ def check(self, pandera_dtype: dtypes.DataType) -> bool:
454457
numpy_engine.Object,
455458
equivalents=[
456459
"object",
460+
"object_",
461+
"object0",
457462
"O",
458463
"bytes",
459464
"decimal",
460465
"mixed-integer",
461466
"mixed",
467+
"bytes",
468+
bytes,
462469
object,
463470
np.object_,
471+
np.bytes_,
464472
],
465473
)
466474

tests/core/test_dtypes.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -397,6 +397,28 @@ def test_default_numeric_dtypes():
397397
)
398398

399399

400+
@pytest.mark.parametrize(
401+
"alias, np_dtype",
402+
[
403+
(alias, np_dtype)
404+
for alias, np_dtype in np.sctypeDict.items()
405+
# int, uint have different bitwidth under pandas and numpy.
406+
if np_dtype != np.void and alias not in ("int", "uint")
407+
],
408+
)
409+
def test_numpy_dtypes(alias, np_dtype):
410+
"""Test that all pandas-compatible numpy dtypes are understood."""
411+
try:
412+
np.dtype(alias)
413+
except TypeError:
414+
# not a valid alias
415+
assert pandas_engine.Engine.dtype(np_dtype)
416+
else:
417+
assert pandas_engine.Engine.dtype(alias) == pandas_engine.Engine.dtype(
418+
np_dtype
419+
)
420+
421+
400422
@pytest.mark.parametrize(
401423
"examples",
402424
[

0 commit comments

Comments
 (0)