Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -72,4 +72,4 @@ repos:
hooks:
- id: codespell
additional_dependencies:
- tomli
- tomli
17 changes: 16 additions & 1 deletion pandera/api/ibis/components.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Core Ibis schema component specifications."""

import logging
from typing import Any, Optional, Type

import ibis
Expand All @@ -11,6 +12,8 @@
from pandera.engines import ibis_engine
from pandera.utils import is_regex

logger = logging.getLogger(__name__)


class Column(ComponentSchema[ibis.Table]):
"""Validate types and properties of table columns."""
Expand Down Expand Up @@ -97,7 +100,7 @@ def __init__(
self.regex = regex
self.name = name

# self.set_regex() # TODO(deepyaman): Implement method.
self.set_regex()

# pylint: disable=unused-argument
@staticmethod
Expand All @@ -118,10 +121,22 @@ def selector(self):
return f"^{self.name}$"
return self.name

def set_regex(self):
if self.name is None:
return

if is_regex(self.name) and not self.regex:
logger.info(
f"Column schema '{self.name}' is a regex expression. "
"Setting regex=True."
)
self.regex = True

def set_name(self, name: str):
"""Set or modify the name of a column object.

:param str name: the name of the column object
"""
self.name = name
self.set_regex()
return self
107 changes: 67 additions & 40 deletions pandera/backends/ibis/components.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@

from __future__ import annotations

from typing import TYPE_CHECKING, List, Optional, cast
from typing import TYPE_CHECKING, Iterable, List, Optional, cast

import ibis
import ibis.selectors as s

from pandera.api.base.error_handler import ErrorHandler
from pandera.backends.base import CoreCheckResult
Expand Down Expand Up @@ -36,45 +37,68 @@
"""Validation backend implementation for Ibis table columns."""
error_handler = ErrorHandler(lazy)

# TODO(deepyaman): subsample the check object if head, tail, or sample are specified
sample = check_obj[schema.name]

# run the checks
core_checks = [
self.check_dtype,
self.run_checks,
]

args = (sample, schema)
for check in core_checks:
results = check(*args)
if isinstance(results, CoreCheckResult):
results = [results]

for result in results:
if result.passed:
continue
# Why cast `results` only in components.py, not in container.py?
results = cast(List[CoreCheckResult], results)
if result.schema_error is not None:
error = result.schema_error
else:
error = SchemaError(
schema=schema,
data=check_obj,
message=result.message,
failure_cases=result.failure_cases,
check=result.check,
check_index=result.check_index,
check_output=result.check_output,
reason_code=result.reason_code,
)
error_handler.collect_error( # Why indent (unlike in container.py)?
validation_type(result.reason_code),
result.reason_code,
error,
original_exc=result.original_exc,
)
def validate_column(check_obj, column_name):
# make sure the schema component mutations are reverted after
# validation
_orig_name = schema.name
_orig_regex = schema.regex

# set the column name and regex flag for a single column
schema.name = column_name
schema.regex = False

# TODO(deepyaman): subsample the check object if head, tail, or sample are specified
sample = check_obj[column_name]

# run the checks
core_checks = [
self.check_dtype,
self.run_checks,
]

args = (sample, schema)
for check in core_checks:
results = check(*args)
if isinstance(results, CoreCheckResult):
results = [results]

for result in results:
if result.passed:
continue
# Why cast `results` only in components.py, not in container.py?
results = cast(List[CoreCheckResult], results)
if result.schema_error is not None:
error = result.schema_error

Check warning on line 71 in pandera/backends/ibis/components.py

View check run for this annotation

Codecov / codecov/patch

pandera/backends/ibis/components.py#L71

Added line #L71 was not covered by tests
else:
error = SchemaError(
schema=schema,
data=check_obj,
message=result.message,
failure_cases=result.failure_cases,
check=result.check,
check_index=result.check_index,
check_output=result.check_output,
reason_code=result.reason_code,
)
error_handler.collect_error( # Why indent (unlike in container.py)?
validation_type(result.reason_code),
result.reason_code,
error,
original_exc=result.original_exc,
)

# revert the schema component mutations
schema.name = _orig_name
schema.regex = _orig_regex

column_keys_to_check = (
self.get_regex_columns(schema, check_obj)
if schema.regex
else [schema.name]
)

for column_name in column_keys_to_check:
validate_column(check_obj, column_name)

if lazy and error_handler.collected_errors:
raise SchemaErrors(
Expand All @@ -85,6 +109,9 @@

return check_obj

def get_regex_columns(self, schema, check_obj) -> Iterable:
return check_obj.select(s.matches(schema.selector)).columns

@validate_scope(scope=ValidationScope.SCHEMA)
def check_dtype(
self, check_obj: ibis.Column, schema: Column
Expand Down
13 changes: 11 additions & 2 deletions pandera/backends/ibis/container.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
from typing import TYPE_CHECKING, Any, Iterable, List, Optional

import ibis
import ibis.selectors as s
from ibis.common.exceptions import IbisError

from pandera.api.base.error_handler import ErrorHandler
from pandera.config import ValidationScope
Expand Down Expand Up @@ -203,7 +205,7 @@
):
absent_column_names.append(col_name)

if col_schema.regex: # TODO(deepyaman): Implement functionality.
if col_schema.regex:
try:
column_names.extend(
col_schema.get_backend(check_obj).get_regex_columns(
Expand Down Expand Up @@ -291,7 +293,14 @@
if column_info.absent_column_names and not schema.add_missing_columns:
for colname in column_info.absent_column_names:
if is_regex(colname):
continue # TODO(deepyaman): Support regex colnames.
try:

Check warning on line 296 in pandera/backends/ibis/container.py

View check run for this annotation

Codecov / codecov/patch

pandera/backends/ibis/container.py#L296

Added line #L296 was not covered by tests
# don't raise an error if the column schema name is a
# regex pattern
check_obj.select(s.matches(colname))
continue
except IbisError:

Check warning on line 301 in pandera/backends/ibis/container.py

View check run for this annotation

Codecov / codecov/patch

pandera/backends/ibis/container.py#L299-L301

Added lines #L299 - L301 were not covered by tests
# regex pattern didn't match any columns
pass

Check warning on line 303 in pandera/backends/ibis/container.py

View check run for this annotation

Codecov / codecov/patch

pandera/backends/ibis/container.py#L303

Added line #L303 was not covered by tests
results.append(
CoreCheckResult(
passed=False,
Expand Down
4 changes: 3 additions & 1 deletion pandera/engines/ibis_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ class DataType(dtypes.DataType):
type: Any = dataclasses.field(repr=False, init=False)
"""Native Ibis dtype boxed by the data type."""

def __init__(self, dtype: Any):
def __init__(self, dtype: Optional[Any] = None):
super().__init__()
object.__setattr__(self, "type", ibis.dtype(dtype))
dtype_cls = dtype if inspect.isclass(dtype) else dtype.__class__
Expand Down Expand Up @@ -220,6 +220,8 @@ class UInt64(DataType, dtypes.UInt64):
class Float32(DataType, dtypes.Float32):
"""Semantic representation of a :class:`dt.Float32`."""

type = dt.float32


@Engine.register_dtype(
equivalents=[
Expand Down
2 changes: 1 addition & 1 deletion tests/ibis/test_ibis_builtin_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1367,7 +1367,7 @@ def pytest_generate_tests(self, metafunc):
)

def get_data_param(self):
"""Generate the params which will be used to test this function. All the accpetable
"""Generate the params which will be used to test this function. All the acceptable
data types would be tested"""
return {
"test_unique_values_eq_check": [
Expand Down
Loading