Skip to content

Commit 88bb609

Browse files
authored
drop_invalid_rows still raises schema-level errors (#2000)
* drop_invalid_rows still raises schema-level errors Signed-off-by: cosmicBboy <[email protected]> * update drop invalid rows docs Signed-off-by: cosmicBboy <[email protected]> --------- Signed-off-by: cosmicBboy <[email protected]>
1 parent 80a04a6 commit 88bb609

File tree

4 files changed

+25
-25
lines changed

4 files changed

+25
-25
lines changed

.pylintrc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,5 +57,6 @@ disable=
5757
multiple-statements,
5858
cyclic-import,
5959
too-many-positional-arguments,
60+
too-many-function-args,
6061
# Due to custom `immutable` decorator replacing `dataclasses.dataclass`
6162
invalid-field-call

docs/source/drop_invalid_rows.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ import pandas as pd
3232
import pandera.pandas as pa
3333
3434
35-
df = pd.DataFrame({"counter": ["1", "2", "3"]})
35+
df = pd.DataFrame({"counter": [1, 2, 3]})
3636
schema = pa.DataFrameSchema(
3737
{"counter": pa.Column(int, checks=[pa.Check(lambda x: x >= 3)])},
3838
drop_invalid_rows=True,
@@ -48,7 +48,7 @@ import pandas as pd
4848
import pandera.pandas as pa
4949
5050
51-
series = pd.Series(["1", "2", "3"])
51+
series = pd.Series([1, 2, 3])
5252
schema = pa.SeriesSchema(
5353
int,
5454
checks=[pa.Check(lambda x: x >= 3)],
@@ -65,7 +65,7 @@ import pandas as pd
6565
import pandera.pandas as pa
6666
6767
68-
df = pd.DataFrame({"counter": ["1", "2", "3"]})
68+
df = pd.DataFrame({"counter": [1, 2, 3]})
6969
schema = pa.Column(
7070
int,
7171
name="counter",

pandera/backends/pandas/base.py

Lines changed: 8 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -189,27 +189,16 @@ def defaultdict_to_dict(d):
189189

190190
def drop_invalid_rows(self, check_obj, error_handler: ErrorHandler):
191191
"""Remove invalid elements in a check obj according to failures caught by the error handler."""
192-
errors = error_handler.schema_errors
193-
for err in errors:
192+
for err in error_handler.schema_errors:
193+
if isinstance(err.failure_cases, str):
194+
# if the failure cases are a string, it means the error is
195+
# a schema-level error.
196+
continue
194197
if isinstance(check_obj.index, pd.MultiIndex):
195-
# MultiIndex values are saved on the error as strings so need to be cast back
196-
# to their original types
197-
if isinstance(err.failure_cases, str):
198-
# string type failure cases indicates that the entire column/dataframe
199-
# being checked didn't pass validation, meaning the entire dataframe
200-
# is invalid.
201-
index_values = check_obj.index
202-
else:
203-
index_tuples = err.failure_cases["index"].apply(eval)
204-
index_values = pd.MultiIndex.from_tuples(index_tuples)
198+
index_tuples = err.failure_cases["index"].apply(eval)
199+
index_values = pd.MultiIndex.from_tuples(index_tuples)
205200
else:
206-
if isinstance(err.failure_cases, str):
207-
# string type failure cases indicates that the entire column/dataframe
208-
# being checked didn't pass validation, meaning the entire dataframe
209-
# is invalid.
210-
index_values = check_obj.index
211-
else:
212-
index_values = err.failure_cases["index"]
201+
index_values = err.failure_cases["index"]
213202

214203
mask = ~check_obj.index.isin(index_values)
215204
check_obj = check_obj.loc[mask]

pandera/backends/pandas/container.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -116,8 +116,18 @@ def validate(
116116

117117
if error_handler.collected_errors:
118118
if getattr(schema, "drop_invalid_rows", False):
119+
# if the failure cases are a string, it means the error is
120+
# a schema-level error.
121+
if any(
122+
isinstance(err.failure_cases, str)
123+
for err in error_handler.schema_errors
124+
):
125+
raise SchemaErrors(
126+
schema=schema,
127+
schema_errors=error_handler.schema_errors,
128+
data=check_obj,
129+
)
119130
check_obj = self.drop_invalid_rows(check_obj, error_handler)
120-
return check_obj
121131
else:
122132
raise SchemaErrors(
123133
schema=schema,
@@ -129,7 +139,7 @@ def validate(
129139

130140
def run_checks_and_handle_errors(
131141
self,
132-
error_handler,
142+
error_handler: ErrorHandler,
133143
schema,
134144
check_obj,
135145
column_info,
@@ -139,7 +149,7 @@ def run_checks_and_handle_errors(
139149
head,
140150
tail,
141151
random_state,
142-
):
152+
) -> ErrorHandler:
143153
"""Run checks on schema"""
144154
# pylint: disable=too-many-locals
145155

0 commit comments

Comments
 (0)