From 5d509765f2884230ffa7009c02666f7ac7626933 Mon Sep 17 00:00:00 2001 From: Kristen Thyng Date: Mon, 30 Jan 2023 17:45:06 -0600 Subject: [PATCH 01/14] Using regex package for match The built-in re package does not allow for global flags like "(?i)" to be anywhere but the start of a pattern string now. The package `regex` still allows this, so it is optionally used for the `match` function if available. * test added * updating whats new --- cf_xarray/accessor.py | 9 +++++++-- cf_xarray/tests/test_accessor.py | 8 ++++++++ doc/whats-new.rst | 1 + 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/cf_xarray/accessor.py b/cf_xarray/accessor.py index 940f62cd..516650d5 100644 --- a/cf_xarray/accessor.py +++ b/cf_xarray/accessor.py @@ -3,6 +3,11 @@ import functools import inspect import itertools + +try: + from regex import match +except ImportError: + from re import match import re import warnings from collections import ChainMap @@ -223,13 +228,13 @@ def _get_custom_criteria( if key in criteria_map: for criterion, patterns in criteria_map[key].items(): for var in obj.variables: - if re.match(patterns, obj[var].attrs.get(criterion, "")): + if match(patterns, obj[var].attrs.get(criterion, "")): results.update((var,)) # also check name specifically since not in attributes elif ( criterion == "name" and isinstance(var, str) - and re.match(patterns, var) + and match(patterns, var) ): results.update((var,)) return list(results) diff --git a/cf_xarray/tests/test_accessor.py b/cf_xarray/tests/test_accessor.py index 4d97ec53..923c082a 100644 --- a/cf_xarray/tests/test_accessor.py +++ b/cf_xarray/tests/test_accessor.py @@ -1584,6 +1584,14 @@ def test_custom_criteria() -> None: ds["temperature"] = ("dim", np.arange(10)) assert_identical(ds.cf["temp"], ds["temperature"]) + # test that having a global regex expression flag later in the expression will work if + # regex is found + vocab = {"temp": {"name": "tem|(?i)temp"}} + ds = xr.Dataset() + ds["Tempblah"] = [0, 1, 2] + with cf_xarray.set_options(custom_criteria=vocab): + assert_identical(ds.cf["temp"], ds["Tempblah"]) + def test_cf_standard_name_table_version() -> None: diff --git a/doc/whats-new.rst b/doc/whats-new.rst index c4c391c8..9a13a002 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -6,6 +6,7 @@ What's New v0.7.7 (Jan 14, 2023) ===================== +- Change to ``accessor.py`` to continue supporting global flags in regular expressions that are not at start of pattern. - Fix to ``geometry.points_to_cf`` to support shapely 2.0. (:pr:`386`). By `Pascal Bourgault`_ From 61ce007fcc211f564544ef7dc155b90543818f1d Mon Sep 17 00:00:00 2001 From: Kristen Thyng Date: Mon, 30 Jan 2023 17:47:31 -0600 Subject: [PATCH 02/14] Update whats-new.rst --- doc/whats-new.rst | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 9a13a002..dcab57a9 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -3,10 +3,14 @@ What's New ---------- +v0.7.8 (unreleased) +=================== + +- Change to ``accessor.py`` to continue supporting global flags in regular expressions that are not at start of pattern. (:pr:`408`). By `Kristen Thyng`_ + v0.7.7 (Jan 14, 2023) ===================== -- Change to ``accessor.py`` to continue supporting global flags in regular expressions that are not at start of pattern. - Fix to ``geometry.points_to_cf`` to support shapely 2.0. (:pr:`386`). By `Pascal Bourgault`_ From 86b911f394619c6c7b39dcf412c20fdde78c99a9 Mon Sep 17 00:00:00 2001 From: Kristen Thyng Date: Tue, 31 Jan 2023 10:06:42 -0600 Subject: [PATCH 03/14] updates to PR --- cf_xarray/tests/__init__.py | 1 + cf_xarray/tests/test_accessor.py | 11 ++++++++++- pyproject.toml | 2 +- 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/cf_xarray/tests/__init__.py b/cf_xarray/tests/__init__.py index eca25d79..3ffcafcd 100644 --- a/cf_xarray/tests/__init__.py +++ b/cf_xarray/tests/__init__.py @@ -67,3 +67,4 @@ def LooseVersion(vstring): has_scipy, requires_scipy = _importorskip("scipy") has_shapely, requires_shapely = _importorskip("shapely") has_pint, requires_pint = _importorskip("pint") +has_regex, requires_regex = _importorskip("regex") diff --git a/cf_xarray/tests/test_accessor.py b/cf_xarray/tests/test_accessor.py index 923c082a..118558b8 100644 --- a/cf_xarray/tests/test_accessor.py +++ b/cf_xarray/tests/test_accessor.py @@ -34,7 +34,13 @@ rotds, vert, ) -from . import raise_if_dask_computes, requires_cftime, requires_pint, requires_scipy +from . import ( + raise_if_dask_computes, + requires_cftime, + requires_pint, + requires_regex, + requires_scipy, +) mpl.use("Agg") @@ -1584,6 +1590,9 @@ def test_custom_criteria() -> None: ds["temperature"] = ("dim", np.arange(10)) assert_identical(ds.cf["temp"], ds["temperature"]) + +@requires_regex +def test_regex_match(): # test that having a global regex expression flag later in the expression will work if # regex is found vocab = {"temp": {"name": "tem|(?i)temp"}} diff --git a/pyproject.toml b/pyproject.toml index 5fdb5348..a4ddb772 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,7 +21,7 @@ dependencies = [ dynamic = ["version"] [project.optional-dependencies] -all = ["matplotlib", "pint", "shapely"] +all = ["matplotlib", "pint", "shapely", "regex"] [project.urls] homepage = "https://cf-xarray.readthedocs.io" From 62def7ecbf201ae3f0e0390e07304dfcf673a988 Mon Sep 17 00:00:00 2001 From: Kristen Thyng Date: Tue, 31 Jan 2023 10:16:39 -0600 Subject: [PATCH 04/14] changed to import full modules due to mypy error --- cf_xarray/accessor.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/cf_xarray/accessor.py b/cf_xarray/accessor.py index 516650d5..d55785ff 100644 --- a/cf_xarray/accessor.py +++ b/cf_xarray/accessor.py @@ -5,10 +5,9 @@ import itertools try: - from regex import match + import regex except ImportError: - from re import match -import re + import re import warnings from collections import ChainMap from datetime import datetime @@ -228,13 +227,13 @@ def _get_custom_criteria( if key in criteria_map: for criterion, patterns in criteria_map[key].items(): for var in obj.variables: - if match(patterns, obj[var].attrs.get(criterion, "")): + if re.match(patterns, obj[var].attrs.get(criterion, "")): results.update((var,)) # also check name specifically since not in attributes elif ( criterion == "name" and isinstance(var, str) - and match(patterns, var) + and re.match(patterns, var) ): results.update((var,)) return list(results) From a5e01e4a45acf2a94fbf9a413a9b4aa674c85c15 Mon Sep 17 00:00:00 2001 From: Kristen Thyng Date: Tue, 31 Jan 2023 10:22:24 -0600 Subject: [PATCH 05/14] oops on the previous commit. hopefully better now. --- cf_xarray/accessor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cf_xarray/accessor.py b/cf_xarray/accessor.py index d55785ff..3525ada7 100644 --- a/cf_xarray/accessor.py +++ b/cf_xarray/accessor.py @@ -5,7 +5,7 @@ import itertools try: - import regex + import regex as re except ImportError: import re import warnings From 4ca0b7fb92c24f080b7ec461f20a45d0c29880e0 Mon Sep 17 00:00:00 2001 From: Kristen Thyng Date: Tue, 31 Jan 2023 10:23:12 -0600 Subject: [PATCH 06/14] added regex to environment.yaml --- ci/environment.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/environment.yml b/ci/environment.yml index 8b138c6d..39a284bb 100644 --- a/ci/environment.yml +++ b/ci/environment.yml @@ -13,6 +13,7 @@ dependencies: - pandas - pint - pooch + - regex - scipy - shapely - xarray From 4ced1320a5624598b742f4a00e5e85f1af81b508 Mon Sep 17 00:00:00 2001 From: Kristen Thyng Date: Tue, 31 Jan 2023 10:43:45 -0600 Subject: [PATCH 07/14] updated custom-criteria.md --- doc/custom-criteria.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/doc/custom-criteria.md b/doc/custom-criteria.md index e4c8e12d..45c7a970 100644 --- a/doc/custom-criteria.md +++ b/doc/custom-criteria.md @@ -102,3 +102,17 @@ cfxr.set_options(custom_criteria=salt_criteria) ds.cf[["salinity"]] ``` + +Here is an example of a more complicated custom criteria, which requires the package `regex` to be installed since a behavior (allowing global flags like "(?i)" for matching case insensitive) was recently deprecated in the `re` package. The custom criteria, called "vocab", matches – case insensitive – to the variable alias "sea_ice_u" a variable whose name includes "sea" and "ice" and "u" but not "qc" or "status", or "sea" and "ice" and "x" and "vel" but not "qc" or "status". + +```{code-cell} +import cf_xarray as cfxr +import xarray as xr + +vocab = {"sea_ice_u": {"name": "(?i)^(?!.*(qc|status))(?=.*sea)(?=.*ice)(?=.*u)|(?i)^(?!.*(qc|status))(?=.*sea)(?=.*ice)(?=.*x)(?=.*vel)"}} +ds = xr.Dataset() +ds["sea_ice_velocity_x"] = [0,1,2] + +with cfxr.set_options(custom_criteria=vocab): + ds.cf["sea_ice_u"] +``` From b36ff245313dc593a4cae43bcef03f69d331065e Mon Sep 17 00:00:00 2001 From: Kristen Thyng Date: Tue, 31 Jan 2023 10:57:00 -0600 Subject: [PATCH 08/14] Update doc/whats-new.rst Co-authored-by: Deepak Cherian --- doc/whats-new.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index dcab57a9..4471ea67 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -6,7 +6,7 @@ What's New v0.7.8 (unreleased) =================== -- Change to ``accessor.py`` to continue supporting global flags in regular expressions that are not at start of pattern. (:pr:`408`). By `Kristen Thyng`_ +- Optionally use the `regex` package to continue supporting global flags in regular expressions that are not at start of pattern. (:pr:`408`). By `Kristen Thyng`_ v0.7.7 (Jan 14, 2023) ===================== From 5f788382fdf33b037d38cd6620984098ac2cd928 Mon Sep 17 00:00:00 2001 From: dcherian Date: Tue, 31 Jan 2023 09:58:07 -0700 Subject: [PATCH 09/14] Add ignore for mypy --- cf_xarray/accessor.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/cf_xarray/accessor.py b/cf_xarray/accessor.py index 3525ada7..cc8617d9 100644 --- a/cf_xarray/accessor.py +++ b/cf_xarray/accessor.py @@ -3,11 +3,7 @@ import functools import inspect import itertools - -try: - import regex as re -except ImportError: - import re +import re import warnings from collections import ChainMap from datetime import datetime @@ -211,6 +207,11 @@ def _get_custom_criteria( List[str], Variable name(s) in parent xarray object that matches axis, coordinate, or custom `key` """ + try: + from regex import match as regex_match + except ImportError: + from re import match as regex_match # type: ignore + if isinstance(obj, DataArray): obj = obj._to_temp_dataset() @@ -227,13 +228,13 @@ def _get_custom_criteria( if key in criteria_map: for criterion, patterns in criteria_map[key].items(): for var in obj.variables: - if re.match(patterns, obj[var].attrs.get(criterion, "")): + if regex_match(patterns, obj[var].attrs.get(criterion, "")): results.update((var,)) # also check name specifically since not in attributes elif ( criterion == "name" and isinstance(var, str) - and re.match(patterns, var) + and regex_match(patterns, var) ): results.update((var,)) return list(results) From 5468efe4cd29a0b42f519b9e3578f8456c1b594b Mon Sep 17 00:00:00 2001 From: Kristen Thyng Date: Tue, 31 Jan 2023 11:14:10 -0600 Subject: [PATCH 10/14] added link to regex package in docs and regex to doc env --- ci/doc.yml | 1 + doc/custom-criteria.md | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/ci/doc.yml b/ci/doc.yml index 56a435e2..4b8620ce 100644 --- a/ci/doc.yml +++ b/ci/doc.yml @@ -18,6 +18,7 @@ dependencies: - pandas - pooch - pint + - regex - furo - pip: - git+https://github.com/xarray-contrib/cf-xarray diff --git a/doc/custom-criteria.md b/doc/custom-criteria.md index 45c7a970..4ec8c3a4 100644 --- a/doc/custom-criteria.md +++ b/doc/custom-criteria.md @@ -103,7 +103,7 @@ cfxr.set_options(custom_criteria=salt_criteria) ds.cf[["salinity"]] ``` -Here is an example of a more complicated custom criteria, which requires the package `regex` to be installed since a behavior (allowing global flags like "(?i)" for matching case insensitive) was recently deprecated in the `re` package. The custom criteria, called "vocab", matches – case insensitive – to the variable alias "sea_ice_u" a variable whose name includes "sea" and "ice" and "u" but not "qc" or "status", or "sea" and "ice" and "x" and "vel" but not "qc" or "status". +Here is an example of a more complicated custom criteria, which requires the package [`regex`](https://github.com/mrabarnett/mrab-regex) to be installed since a behavior (allowing global flags like "(?i)" for matching case insensitive) was recently deprecated in the `re` package. The custom criteria, called "vocab", matches – case insensitive – to the variable alias "sea_ice_u" a variable whose name includes "sea" and "ice" and "u" but not "qc" or "status", or "sea" and "ice" and "x" and "vel" but not "qc" or "status". ```{code-cell} import cf_xarray as cfxr From 0b83091db93b56cc0c9cc73cfc4531fd4cc2ccb6 Mon Sep 17 00:00:00 2001 From: Kristen Thyng Date: Tue, 31 Jan 2023 11:28:12 -0600 Subject: [PATCH 11/14] example in doc was not being shown but now is --- doc/custom-criteria.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/custom-criteria.md b/doc/custom-criteria.md index 4ec8c3a4..e50a793b 100644 --- a/doc/custom-criteria.md +++ b/doc/custom-criteria.md @@ -114,5 +114,6 @@ ds = xr.Dataset() ds["sea_ice_velocity_x"] = [0,1,2] with cfxr.set_options(custom_criteria=vocab): - ds.cf["sea_ice_u"] + seaiceu = ds.cf["sea_ice_u"] +seaiceu ``` From 965afb75292ae3f9492b86dd4f9af6a3a6800b1d Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Tue, 31 Jan 2023 10:37:11 -0700 Subject: [PATCH 12/14] Update doc/custom-criteria.md --- doc/custom-criteria.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/custom-criteria.md b/doc/custom-criteria.md index e50a793b..f6daa08d 100644 --- a/doc/custom-criteria.md +++ b/doc/custom-criteria.md @@ -103,6 +103,8 @@ cfxr.set_options(custom_criteria=salt_criteria) ds.cf[["salinity"]] ``` +## More complex matches with `regex` + Here is an example of a more complicated custom criteria, which requires the package [`regex`](https://github.com/mrabarnett/mrab-regex) to be installed since a behavior (allowing global flags like "(?i)" for matching case insensitive) was recently deprecated in the `re` package. The custom criteria, called "vocab", matches – case insensitive – to the variable alias "sea_ice_u" a variable whose name includes "sea" and "ice" and "u" but not "qc" or "status", or "sea" and "ice" and "x" and "vel" but not "qc" or "status". ```{code-cell} From 1e748a7f03e6710c11598e125fe72ca04a394f70 Mon Sep 17 00:00:00 2001 From: Kristen Thyng Date: Tue, 31 Jan 2023 11:39:58 -0600 Subject: [PATCH 13/14] Update doc/custom-criteria.md Co-authored-by: Deepak Cherian --- doc/custom-criteria.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/custom-criteria.md b/doc/custom-criteria.md index f6daa08d..c6e30d05 100644 --- a/doc/custom-criteria.md +++ b/doc/custom-criteria.md @@ -105,6 +105,8 @@ ds.cf[["salinity"]] ## More complex matches with `regex` +## More complex matches with `regex` + Here is an example of a more complicated custom criteria, which requires the package [`regex`](https://github.com/mrabarnett/mrab-regex) to be installed since a behavior (allowing global flags like "(?i)" for matching case insensitive) was recently deprecated in the `re` package. The custom criteria, called "vocab", matches – case insensitive – to the variable alias "sea_ice_u" a variable whose name includes "sea" and "ice" and "u" but not "qc" or "status", or "sea" and "ice" and "x" and "vel" but not "qc" or "status". ```{code-cell} From c7566270bb88136d9f0b3d0208f774c75dee7d43 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Tue, 31 Jan 2023 10:40:52 -0700 Subject: [PATCH 14/14] Update doc/custom-criteria.md --- doc/custom-criteria.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/doc/custom-criteria.md b/doc/custom-criteria.md index c6e30d05..f6daa08d 100644 --- a/doc/custom-criteria.md +++ b/doc/custom-criteria.md @@ -105,8 +105,6 @@ ds.cf[["salinity"]] ## More complex matches with `regex` -## More complex matches with `regex` - Here is an example of a more complicated custom criteria, which requires the package [`regex`](https://github.com/mrabarnett/mrab-regex) to be installed since a behavior (allowing global flags like "(?i)" for matching case insensitive) was recently deprecated in the `re` package. The custom criteria, called "vocab", matches – case insensitive – to the variable alias "sea_ice_u" a variable whose name includes "sea" and "ice" and "u" but not "qc" or "status", or "sea" and "ice" and "x" and "vel" but not "qc" or "status". ```{code-cell}