Skip to content

Commit 6898d38

Browse files
committed
Refactor checks, update plugins and configs, improve docs
1 parent c28e060 commit 6898d38

30 files changed

+829
-247
lines changed

README.md

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,20 @@ $ pip install cc-plugin-wcrp
1313
```
1414
See the [**IOOS/compliance-checker**](https://github.com/ioos/compliance-checker#installation) for additional Installation notes.
1515

16-
And then install Esgvoc and universe to get the Controlled Vocabulary :
16+
And then install Esgvoc and the other projects to get the Controlled Vocabulary :
1717

1818
```shell
19-
$ esgvoc config set universe:branch=esgvoc_dev
2019
$ esgvoc config add cordex-cmip6
20+
$ esgvoc config add cmip7
2121
$ esgvoc install
2222
```
23+
You should verify with `esgvoc config show` that all projects are on the `esgvoc` branch.
2324

25+
If not, run:
26+
```bash
27+
esgvoc config set project:branch=esgvoc
28+
esgvoc install
29+
```
2430
## Usage
2531

2632
```shell

checks/attribute_checks/check_attribute_suite.py

Lines changed: 52 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ def check_attribute_suite(
6262
severity: int,
6363
value_type: Optional[str] = None,
6464
is_required: bool = True,
65+
na_value: Optional[Any] = None,
6566
pattern: Optional[str] = None,
6667
constant: Any = None,
6768
enum: Optional[Iterable[Any]] = None,
@@ -95,6 +96,11 @@ def check_attribute_suite(
9596
9697
The ONLY allowed combination is:
9798
cv_source_collection + cv_source_collection_key
99+
100+
na_value behavior:
101+
- if the attribute is missing -> normal ATTR001 logic
102+
- if the attribute exists and its value equals na_value -> stop after ATTR001
103+
- otherwise continue with normal validation
98104
"""
99105

100106
results = []
@@ -123,8 +129,6 @@ def check_attribute_suite(
123129
existence_ctx = TestCtx(severity, f"[ATTR001] {label} existence")
124130
try:
125131
attr_value = obj.getncattr(nc_key)
126-
existence_ctx.add_pass()
127-
results.append(existence_ctx.to_result())
128132
except AttributeError:
129133
if is_required:
130134
existence_ctx.add_failure(
@@ -133,6 +137,16 @@ def check_attribute_suite(
133137
results.append(existence_ctx.to_result())
134138
return results # stop here if missing
135139

140+
# -------------------------------------------------------------------------
141+
# Short-circuit on sentinel / not-applicable value
142+
# -------------------------------------------------------------------------
143+
if na_value is not None:
144+
if str(attr_value).strip().lower() == str(na_value).strip().lower():
145+
return results
146+
147+
existence_ctx.add_pass()
148+
results.append(existence_ctx.to_result())
149+
136150
# -------------------------------------------------------------------------
137151
# ATTR002 - Type check
138152
# -------------------------------------------------------------------------
@@ -147,6 +161,8 @@ def check_attribute_suite(
147161
"str": str,
148162
"int": (int, np.integer),
149163
"float": (float, np.floating),
164+
"double": np.float64,
165+
"simple": np.float32,
150166
"bool": (bool, np.bool_),
151167
"str_array": list,
152168
}
@@ -310,22 +326,48 @@ def check_attribute_suite(
310326
invalid: list[Any] = []
311327

312328
try:
313-
# If a key is provided, do the direct get_term_in_collection (as requested).
314-
# This checks that the collection contains that term_id key.
315329
if cv_source_collection_key:
316-
term = voc.get_term_in_collection(
330+
terms = voc.get_all_terms_in_collection(
317331
project_id=project_name,
318332
collection_id=cv_source_collection,
319-
term_id=cv_source_collection_key,
320333
)
321-
if not term:
334+
335+
if not terms:
322336
ctx.add_failure(
323-
f"CV collection '{cv_source_collection}' has no term key '{cv_source_collection_key}'."
337+
f"CV collection '{cv_source_collection}' is empty or could not be retrieved."
324338
)
325339
results.append(ctx.to_result())
326340
return results
327341

328-
# Then validate the actual attribute value against the collection
342+
for val in values:
343+
found = False
344+
val_norm = " ".join(str(val).strip().lower().split())
345+
346+
for term in terms:
347+
candidate = getattr(term, str(cv_source_collection_key), None)
348+
if candidate is None:
349+
continue
350+
351+
candidate_norm = " ".join(str(candidate).strip().lower().split())
352+
353+
if val_norm == candidate_norm or val_norm in candidate_norm:
354+
found = True
355+
break
356+
357+
if not found:
358+
invalid.append(val)
359+
360+
if invalid:
361+
ctx.add_failure(
362+
f"Value(s) {invalid} not found in field '{cv_source_collection_key}' "
363+
f"of any term in CV collection '{cv_source_collection}'."
364+
)
365+
else:
366+
ctx.add_pass()
367+
368+
results.append(ctx.to_result())
369+
return results
370+
329371
for val in values:
330372
if not voc.valid_term_in_collection(
331373
value=val,
@@ -348,4 +390,4 @@ def check_attribute_suite(
348390
return results
349391

350392
# If no ATTR004 rule configured, we simply return existence/type/utf8 results.
351-
return results
393+
return results

checks/consistency_checks/check_attributes_match_filename.py

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from compliance_checker.base import TestCtx
55
from checks.utils import _parse_filename_components
66

7-
# CMIP6: time_range exists in filename, not a GA => parse but don't compare
7+
# CMIP6
88
_FILENAME_KEYS_CMIP6_PARSE = [
99
"variable_id",
1010
"table_id",
@@ -23,7 +23,7 @@
2323
"grid_label",
2424
]
2525

26-
# CMIP7: time_range exists in filename, not a GA => parse but don't compare
26+
# CMIP7
2727
_FILENAME_KEYS_CMIP7_PARSE = [
2828
"variable_id",
2929
"branding_suffix",
@@ -77,13 +77,6 @@ def _parse_cmip7_filename(filename: str):
7777

7878

7979
def _unwrap_facets(maybe_tuple):
80-
"""
81-
In this repo, checks.utils._parse_filename_components may return:
82-
- dict
83-
- None
84-
- (dict, extra) <-- this is what broke CMIP6
85-
We only need the dict.
86-
"""
8780
if isinstance(maybe_tuple, tuple) and len(maybe_tuple) > 0 and isinstance(maybe_tuple[0], dict):
8881
return maybe_tuple[0]
8982
return maybe_tuple
@@ -96,7 +89,6 @@ def check_filename_vs_global_attrs(ds, severity, filename_template_keys=None):
9689
Important:
9790
- time_range is NOT a global attribute in CMIP6 or CMIP7.
9891
It's parsed from filename but not compared here.
99-
(Your VAR009 handles time-range vs data.)
10092
"""
10193
fixed_check_id = "ATTR005"
10294
description = f"[{fixed_check_id}] Consistency: Filename vs Global Attributes"
@@ -109,7 +101,7 @@ def check_filename_vs_global_attrs(ds, severity, filename_template_keys=None):
109101

110102
filename = os.path.basename(filepath)
111103

112-
# ---------------- CMIP7 branch ----------------
104+
# ---------------- CMIP7----------------
113105
if _is_cmip7(ds):
114106
facets = _parse_cmip7_filename(filename)
115107
if facets is None:
@@ -119,7 +111,7 @@ def check_filename_vs_global_attrs(ds, severity, filename_template_keys=None):
119111
return [ctx.to_result()]
120112
compare_keys = _FILENAME_KEYS_CMIP7_COMPARE
121113

122-
# ---------------- CMIP6 (default) branch ----------------
114+
# ---------------- CMIP6 ----------------
123115
else:
124116
parse_keys = filename_template_keys or _FILENAME_KEYS_CMIP6_PARSE
125117
facets = _parse_filename_components(filename, parse_keys)

checks/consistency_checks/check_drs_consistency.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,7 @@
66
from checks.utils import _get_drs_facets
77

88
def _unwrap_facets(x):
9-
"""
10-
Some implementations of checks.utils._get_drs_facets return tuples like:
11-
(facets_dict, extra)
129

13-
"""
1410
if isinstance(x, tuple) and len(x) > 0 and isinstance(x[0], dict):
1511
return x[0]
1612
return x
@@ -24,13 +20,13 @@ def _unwrap_facets(x):
2420
"variant_label", "grid_label", "time_range",
2521
]
2622

27-
# CMIP7 directoryStructureDD (CMIP7 doc v1.0)
23+
# CMIP7 directoryStructure
2824
_dir_template_keys_cmip7 = [
2925
"drs_specs", "mip_era", "activity_id", "institution_id", "source_id",
3026
"experiment_id", "variant_label", "region", "frequency", "variable_id",
3127
"branding_suffix", "grid_label", "directory_date",
3228
]
33-
# CMIP7 fileNameDD
29+
# CMIP7 fileName
3430
_filename_template_keys_cmip7 = [
3531
"variable_id", "branding_suffix", "frequency", "region", "grid_label",
3632
"source_id", "experiment_id", "variant_label", "time_range",
@@ -92,7 +88,7 @@ def check_attributes_match_directory_structure(
9288
return [ctx.to_result()]
9389

9490
if _is_cmip7(ds, project_id):
95-
# drs_specs is required to anchor the DRS in the filesystem
91+
9692
try:
9793
drs_specs = str(ds.getncattr("drs_specs"))
9894
except Exception:

checks/consistency_checks/check_drs_filename_cv.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414
def _normalize_project_id(project_id: str) -> str:
1515
if isinstance(project_id, str) and project_id.lower() == "cmip7":
16-
return "CMIP7"
16+
return "cmip7"
1717
if isinstance(project_id, str) and project_id.lower() == "cmip6":
1818
return "cmip6"
1919
return project_id
@@ -76,7 +76,6 @@ def check_drs_directory(ds, severity, project_id="cmip6"):
7676
ctx.add_failure("File path could not be determined.")
7777
return [ctx.to_result()]
7878

79-
# If dataset declares drs_specs, prefer parsing from that directory name.
8079
drs_specs = None
8180
try:
8281
drs_specs = ds.getncattr("drs_specs")

0 commit comments

Comments
 (0)