Skip to content

Commit 7477fd1

Browse files
OstheerMathijs Verhaeghpre-commit-ci[bot]headtr1ck
authored
Per-variable specification of boolean parameters in open_dataset (#9218)
* allow per-variable choice of mask_and_scale in open_dataset * simplify docstring datatype * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * dict -> Mapping in type annotation Co-authored-by: Michael Niklas <[email protected]> * use typevar for _item_or_default annotation Otherwise you lose all typing when you use that because it returns Any. * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * implement feature for 4 additional parameters * fix default value inconsistency * add what's new + None annotation * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * _item_or_default return type T | None * remove deault default value _item_or_default * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * docstring dtype naming --------- Co-authored-by: Mathijs Verhaegh <[email protected]> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Michael Niklas <[email protected]>
1 parent 076c0c2 commit 7477fd1

File tree

3 files changed

+47
-24
lines changed

3 files changed

+47
-24
lines changed

doc/whats-new.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@ v2024.06.1 (unreleased)
2222

2323
New Features
2424
~~~~~~~~~~~~
25+
- Allow per-variable specification of ``mask_and_scale``, ``decode_times``, ``decode_timedelta``
26+
``use_cftime`` and ``concat_characters`` params in :py:func:`~xarray.open_dataset` (:pull:`9218`).
27+
By `Mathijs Verhaegh <https://github.com/Ostheer>`_.
2528
- Allow chunking for arrays with duplicated dimension names (:issue:`8759`, :pull:`9099`).
2629
By `Martin Raspaud <https://github.com/mraspaud>`_.
2730
- Extract the source url from fsspec objects (:issue:`9142`, :pull:`8923`).

xarray/backends/api.py

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -398,11 +398,11 @@ def open_dataset(
398398
chunks: T_Chunks = None,
399399
cache: bool | None = None,
400400
decode_cf: bool | None = None,
401-
mask_and_scale: bool | None = None,
402-
decode_times: bool | None = None,
403-
decode_timedelta: bool | None = None,
404-
use_cftime: bool | None = None,
405-
concat_characters: bool | None = None,
401+
mask_and_scale: bool | Mapping[str, bool] | None = None,
402+
decode_times: bool | Mapping[str, bool] | None = None,
403+
decode_timedelta: bool | Mapping[str, bool] | None = None,
404+
use_cftime: bool | Mapping[str, bool] | None = None,
405+
concat_characters: bool | Mapping[str, bool] | None = None,
406406
decode_coords: Literal["coordinates", "all"] | bool | None = None,
407407
drop_variables: str | Iterable[str] | None = None,
408408
inline_array: bool = False,
@@ -451,25 +451,31 @@ def open_dataset(
451451
decode_cf : bool, optional
452452
Whether to decode these variables, assuming they were saved according
453453
to CF conventions.
454-
mask_and_scale : bool, optional
454+
mask_and_scale : bool or dict-like, optional
455455
If True, replace array values equal to `_FillValue` with NA and scale
456456
values according to the formula `original_values * scale_factor +
457457
add_offset`, where `_FillValue`, `scale_factor` and `add_offset` are
458458
taken from variable attributes (if they exist). If the `_FillValue` or
459459
`missing_value` attribute contains multiple values a warning will be
460460
issued and all array values matching one of the multiple values will
461-
be replaced by NA. This keyword may not be supported by all the backends.
462-
decode_times : bool, optional
461+
be replaced by NA. Pass a mapping, e.g. ``{"my_variable": False}``,
462+
to toggle this feature per-variable individually.
463+
This keyword may not be supported by all the backends.
464+
decode_times : bool or dict-like, optional
463465
If True, decode times encoded in the standard NetCDF datetime format
464466
into datetime objects. Otherwise, leave them encoded as numbers.
467+
Pass a mapping, e.g. ``{"my_variable": False}``,
468+
to toggle this feature per-variable individually.
465469
This keyword may not be supported by all the backends.
466-
decode_timedelta : bool, optional
470+
decode_timedelta : bool or dict-like, optional
467471
If True, decode variables and coordinates with time units in
468472
{"days", "hours", "minutes", "seconds", "milliseconds", "microseconds"}
469473
into timedelta objects. If False, leave them encoded as numbers.
470474
If None (default), assume the same value of decode_time.
475+
Pass a mapping, e.g. ``{"my_variable": False}``,
476+
to toggle this feature per-variable individually.
471477
This keyword may not be supported by all the backends.
472-
use_cftime: bool, optional
478+
use_cftime: bool or dict-like, optional
473479
Only relevant if encoded dates come from a standard calendar
474480
(e.g. "gregorian", "proleptic_gregorian", "standard", or not
475481
specified). If None (default), attempt to decode times to
@@ -478,12 +484,16 @@ def open_dataset(
478484
``cftime.datetime`` objects, regardless of whether or not they can be
479485
represented using ``np.datetime64[ns]`` objects. If False, always
480486
decode times to ``np.datetime64[ns]`` objects; if this is not possible
481-
raise an error. This keyword may not be supported by all the backends.
482-
concat_characters : bool, optional
487+
raise an error. Pass a mapping, e.g. ``{"my_variable": False}``,
488+
to toggle this feature per-variable individually.
489+
This keyword may not be supported by all the backends.
490+
concat_characters : bool or dict-like, optional
483491
If True, concatenate along the last dimension of character arrays to
484492
form string arrays. Dimensions will only be concatenated over (and
485493
removed) if they have no corresponding variable and if they are only
486494
used as the last dimension of character arrays.
495+
Pass a mapping, e.g. ``{"my_variable": False}``,
496+
to toggle this feature per-variable individually.
487497
This keyword may not be supported by all the backends.
488498
decode_coords : bool or {"coordinates", "all"}, optional
489499
Controls which variables are set as coordinate variables:

xarray/conventions.py

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
from collections import defaultdict
44
from collections.abc import Hashable, Iterable, Mapping, MutableMapping
5-
from typing import TYPE_CHECKING, Any, Literal, Union
5+
from typing import TYPE_CHECKING, Any, Literal, TypeVar, Union
66

77
import numpy as np
88
import pandas as pd
@@ -384,16 +384,26 @@ def _update_bounds_encoding(variables: T_Variables) -> None:
384384
bounds_encoding.setdefault("calendar", encoding["calendar"])
385385

386386

387+
T = TypeVar("T")
388+
389+
390+
def _item_or_default(obj: Mapping[Any, T] | T, key: Hashable, default: T) -> T:
391+
"""
392+
Return item by key if obj is mapping and key is present, else return default value.
393+
"""
394+
return obj.get(key, default) if isinstance(obj, Mapping) else obj
395+
396+
387397
def decode_cf_variables(
388398
variables: T_Variables,
389399
attributes: T_Attrs,
390-
concat_characters: bool = True,
391-
mask_and_scale: bool = True,
392-
decode_times: bool = True,
400+
concat_characters: bool | Mapping[str, bool] = True,
401+
mask_and_scale: bool | Mapping[str, bool] = True,
402+
decode_times: bool | Mapping[str, bool] = True,
393403
decode_coords: bool | Literal["coordinates", "all"] = True,
394404
drop_variables: T_DropVariables = None,
395-
use_cftime: bool | None = None,
396-
decode_timedelta: bool | None = None,
405+
use_cftime: bool | Mapping[str, bool] | None = None,
406+
decode_timedelta: bool | Mapping[str, bool] | None = None,
397407
) -> tuple[T_Variables, T_Attrs, set[Hashable]]:
398408
"""
399409
Decode several CF encoded variables.
@@ -431,7 +441,7 @@ def stackable(dim: Hashable) -> bool:
431441
if k in drop_variables:
432442
continue
433443
stack_char_dim = (
434-
concat_characters
444+
_item_or_default(concat_characters, k, True)
435445
and v.dtype == "S1"
436446
and v.ndim > 0
437447
and stackable(v.dims[-1])
@@ -440,12 +450,12 @@ def stackable(dim: Hashable) -> bool:
440450
new_vars[k] = decode_cf_variable(
441451
k,
442452
v,
443-
concat_characters=concat_characters,
444-
mask_and_scale=mask_and_scale,
445-
decode_times=decode_times,
453+
concat_characters=_item_or_default(concat_characters, k, True),
454+
mask_and_scale=_item_or_default(mask_and_scale, k, True),
455+
decode_times=_item_or_default(decode_times, k, True),
446456
stack_char_dim=stack_char_dim,
447-
use_cftime=use_cftime,
448-
decode_timedelta=decode_timedelta,
457+
use_cftime=_item_or_default(use_cftime, k, None),
458+
decode_timedelta=_item_or_default(decode_timedelta, k, None),
449459
)
450460
except Exception as e:
451461
raise type(e)(f"Failed to decode variable {k!r}: {e}") from e

0 commit comments

Comments
 (0)