Skip to content

Commit 7a20c7a

Browse files
authored
update cache dataframe config args, fix tests (#1437)
This PR renames the pandera config arguments introduced in this PR: #1414 and makes the names more generic. Fixes tests that were broken by the config changes. Signed-off-by: Niels Bantilan <[email protected]>
1 parent 81bab7d commit 7a20c7a

File tree

5 files changed

+28
-24
lines changed

5 files changed

+28
-24
lines changed

pandera/backends/pyspark/decorators.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,7 @@ def _wrapper(func):
156156
@functools.wraps(func)
157157
def wrapper(self, *args, **kwargs):
158158
# Skip if not enabled
159-
if CONFIG.pyspark_cache is not True:
159+
if CONFIG.cache_dataframe is not True:
160160
return func(self, *args, **kwargs)
161161

162162
check_obj: DataFrame = None
@@ -186,7 +186,7 @@ def cached_check_obj():
186186

187187
yield # Execute the decorated function
188188

189-
if not CONFIG.pyspark_keep_cache:
189+
if not CONFIG.keep_cached_dataframe:
190190
# If not cached, `.unpersist()` does nothing
191191
logger.debug("Unpersisting dataframe...")
192192
check_obj.unpersist()

pandera/config.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@ class PanderaConfig(BaseModel):
2626

2727
validation_enabled: bool = True
2828
validation_depth: ValidationDepth = ValidationDepth.SCHEMA_AND_DATA
29-
pyspark_cache: bool = False
30-
pyspark_keep_cache: bool = False
29+
cache_dataframe: bool = False
30+
keep_cached_dataframe: bool = False
3131

3232

3333
# this config variable should be accessible globally
@@ -39,11 +39,11 @@ class PanderaConfig(BaseModel):
3939
validation_depth=os.environ.get(
4040
"PANDERA_VALIDATION_DEPTH", ValidationDepth.SCHEMA_AND_DATA
4141
),
42-
pyspark_cache=os.environ.get(
42+
cache_dataframe=os.environ.get(
4343
"PANDERA_CACHE_DATAFRAME",
4444
False,
4545
),
46-
pyspark_keep_cache=os.environ.get(
46+
keep_cached_dataframe=os.environ.get(
4747
"PANDERA_KEEP_CACHED_DATAFRAME",
4848
False,
4949
),

tests/core/test_pandas_config.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@ class TestSchema(DataFrameModel):
4444
price_val: int = pa.Field()
4545

4646
expected = {
47+
"cache_dataframe": False,
48+
"keep_cached_dataframe": False,
4749
"validation_enabled": False,
4850
"validation_depth": ValidationDepth.SCHEMA_AND_DATA,
4951
}
@@ -61,6 +63,8 @@ class TestPandasSeriesConfig:
6163
def test_disable_validation(self, disable_validation):
6264
"""This function validates that a none object is loaded if validation is disabled"""
6365
expected = {
66+
"cache_dataframe": False,
67+
"keep_cached_dataframe": False,
6468
"validation_enabled": False,
6569
"validation_depth": ValidationDepth.SCHEMA_AND_DATA,
6670
}

tests/pyspark/test_pyspark_config.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,8 @@ class TestSchema(DataFrameModel):
4242
expected = {
4343
"validation_enabled": False,
4444
"validation_depth": ValidationDepth.SCHEMA_AND_DATA,
45-
"pyspark_cache": False,
46-
"pyspark_keep_cache": False,
45+
"cache_dataframe": False,
46+
"keep_cached_dataframe": False,
4747
}
4848

4949
assert CONFIG.dict() == expected
@@ -66,8 +66,8 @@ def test_schema_only(self, spark, sample_spark_schema):
6666
expected = {
6767
"validation_enabled": True,
6868
"validation_depth": ValidationDepth.SCHEMA_ONLY,
69-
"pyspark_cache": False,
70-
"pyspark_keep_cache": False,
69+
"cache_dataframe": False,
70+
"keep_cached_dataframe": False,
7171
}
7272
assert CONFIG.dict() == expected
7373

@@ -146,8 +146,8 @@ def test_data_only(self, spark, sample_spark_schema):
146146
expected = {
147147
"validation_enabled": True,
148148
"validation_depth": ValidationDepth.DATA_ONLY,
149-
"pyspark_cache": False,
150-
"pyspark_keep_cache": False,
149+
"cache_dataframe": False,
150+
"keep_cached_dataframe": False,
151151
}
152152
assert CONFIG.dict() == expected
153153

@@ -233,8 +233,8 @@ def test_schema_and_data(self, spark, sample_spark_schema):
233233
expected = {
234234
"validation_enabled": True,
235235
"validation_depth": ValidationDepth.SCHEMA_AND_DATA,
236-
"pyspark_cache": False,
237-
"pyspark_keep_cache": False,
236+
"cache_dataframe": False,
237+
"keep_cached_dataframe": False,
238238
}
239239
assert CONFIG.dict() == expected
240240

@@ -339,21 +339,21 @@ class TestSchema(DataFrameModel):
339339
@pytest.mark.parametrize("cache_enabled", [True, False])
340340
@pytest.mark.parametrize("keep_cache_enabled", [True, False])
341341
# pylint:disable=too-many-locals
342-
def test_pyspark_cache_settings(
342+
def test_cache_dataframe_settings(
343343
self,
344344
cache_enabled,
345345
keep_cache_enabled,
346346
):
347347
"""This function validates setters and getters for cache/keep_cache options."""
348348
# Set expected properties in Config object
349-
CONFIG.pyspark_cache = cache_enabled
350-
CONFIG.pyspark_keep_cache = keep_cache_enabled
349+
CONFIG.cache_dataframe = cache_enabled
350+
CONFIG.keep_cached_dataframe = keep_cache_enabled
351351

352352
# Evaluate expected Config
353353
expected = {
354354
"validation_enabled": True,
355355
"validation_depth": ValidationDepth.SCHEMA_AND_DATA,
356-
"pyspark_cache": cache_enabled,
357-
"pyspark_keep_cache": keep_cache_enabled,
356+
"cache_dataframe": cache_enabled,
357+
"keep_cached_dataframe": keep_cache_enabled,
358358
}
359359
assert CONFIG.dict() == expected

tests/pyspark/test_pyspark_decorators.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,10 @@ class TestPanderaDecorators:
2222

2323
sample_data = [("Bread", 9), ("Cutter", 15)]
2424

25-
def test_pyspark_cache_requirements(self, spark, sample_spark_schema):
25+
def test_cache_dataframe_requirements(self, spark, sample_spark_schema):
2626
"""Validates if decorator can only be applied in a proper function."""
2727
# Set expected properties in Config object
28-
CONFIG.pyspark_cache = True
28+
CONFIG.cache_dataframe = True
2929
input_df = spark_df(spark, self.sample_data, sample_spark_schema)
3030

3131
class FakeDataFrameSchemaBackend:
@@ -74,7 +74,7 @@ def func_wo_check_obj(self, message: str):
7474
)
7575

7676
# pylint:disable=too-many-locals
77-
def test_pyspark_cache_settings(
77+
def test_cache_dataframe_settings(
7878
self,
7979
spark,
8080
sample_spark_schema,
@@ -86,8 +86,8 @@ def test_pyspark_cache_settings(
8686
):
8787
"""This function validates that caching/unpersisting works as expected."""
8888
# Set expected properties in Config object
89-
CONFIG.pyspark_cache = cache_enabled
90-
CONFIG.pyspark_keep_cache = keep_cache_enabled
89+
CONFIG.cache_dataframe = cache_enabled
90+
CONFIG.keep_cached_dataframe = keep_cache_enabled
9191

9292
# Prepare test data
9393
input_df = spark_df(spark, self.sample_data, sample_spark_schema)

0 commit comments

Comments
 (0)