Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pandera/backends/polars/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ def failure_cases_metadata(
column=pl.lit(err.schema.name),
check=pl.lit(check_identifier),
check_number=pl.lit(err.check_index),
index=index,
index=index.limit(failure_cases_df.shape[0]),
).cast(
{
"failure_case": pl.Utf8,
Expand Down
4 changes: 4 additions & 0 deletions pandera/backends/polars/checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,10 @@ def postprocess_lazyframe_output(

if check_obj.key != "*":
failure_cases = failure_cases.select(check_obj.key)

if self.check.n_failure_cases is not None:
failure_cases = failure_cases.limit(self.check.n_failure_cases)

return CheckResult(
check_output=results,
check_passed=passed,
Expand Down
6 changes: 5 additions & 1 deletion pandera/engines/polars_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,12 @@ def polars_object_coercible(
) -> pl.LazyFrame:
"""Checks whether a polars object is coercible with respect to a type."""
key = data_container.key or "*"

# do a strict cast for list types since is_not_null() cannot correctly
# evaluate null values in lists.
strict = isinstance(type_, pl.List)
coercible = data_container.lazyframe.cast(
{key: type_}, strict=False
{key: type_}, strict=strict
).select(pl.col(key).is_not_null())
# reduce to a single boolean column
return coercible.select(pl.all_horizontal(key).alias(CHECK_OUTPUT_KEY))
Expand Down
28 changes: 28 additions & 0 deletions tests/polars/test_polars_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,3 +232,31 @@ def custom_check(data: pa.PolarsData) -> pl.LazyFrame:

with pytest.raises(pa.errors.SchemaError):
schema.validate(invalid_lf)


def test_polars_column_check_n_failure_cases(column_lf):
n_failure_cases = 2
check = pa.Check(
lambda data: data.lazyframe.select(pl.col("*").lt(0)),
n_failure_cases=n_failure_cases,
)
schema = pa.DataFrameSchema({"col": pa.Column(checks=check)})

try:
schema.validate(column_lf, lazy=True)
except pa.errors.SchemaErrors as exc:
assert exc.failure_cases.shape[0] == n_failure_cases


def test_polars_dataframe_check_n_failure_cases(lf):
n_failure_cases = 2
check = pa.Check(
lambda data: data.lazyframe.select(pl.col("*").lt(0)),
n_failure_cases=n_failure_cases,
)
schema = pa.DataFrameSchema(checks=check)

try:
schema.validate(lf, lazy=True)
except pa.errors.SchemaErrors as exc:
assert exc.failure_cases.shape[0] == n_failure_cases