From 587ebb812ab9fa5160146195fe38859c7d183697 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Thu, 11 Aug 2022 03:11:52 -0400 Subject: [PATCH 001/155] copied files defining strategies over to this branch --- xarray/tests/duckarrays/base/strategies.py | 161 +++++++++++++++++++++ xarray/tests/duckarrays/base/utils.py | 36 +++++ 2 files changed, 197 insertions(+) create mode 100644 xarray/tests/duckarrays/base/strategies.py create mode 100644 xarray/tests/duckarrays/base/utils.py diff --git a/xarray/tests/duckarrays/base/strategies.py b/xarray/tests/duckarrays/base/strategies.py new file mode 100644 index 00000000000..42eee29b554 --- /dev/null +++ b/xarray/tests/duckarrays/base/strategies.py @@ -0,0 +1,161 @@ +import hypothesis.extra.numpy as npst +import hypothesis.strategies as st + +import xarray as xr +from xarray.core.utils import is_dict_like + +from . import utils + +all_dtypes = ( + npst.integer_dtypes() + | npst.unsigned_integer_dtypes() + | npst.floating_dtypes() + | npst.complex_number_dtypes() +) + + +def numpy_array(shape, dtypes=None): + if dtypes is None: + dtypes = all_dtypes + + def elements(dtype): + max_value = 100 + min_value = 0 if dtype.kind == "u" else -max_value + + return npst.from_dtype( + dtype, allow_infinity=False, min_value=min_value, max_value=max_value + ) + + return dtypes.flatmap( + lambda dtype: npst.arrays(dtype=dtype, shape=shape, elements=elements(dtype)) + ) + + +def dimension_sizes(min_dims, max_dims, min_size, max_size): + sizes = st.lists( + elements=st.tuples(st.text(min_size=1), st.integers(min_size, max_size)), + min_size=min_dims, + max_size=max_dims, + unique_by=lambda x: x[0], + ) + return sizes + + +@st.composite +def variable( + draw, + create_data, + *, + sizes=None, + min_size=1, + max_size=3, + min_dims=1, + max_dims=3, + dtypes=None, +): + if sizes is None: + sizes = draw( + dimension_sizes( + min_size=min_size, + max_size=max_size, + min_dims=min_dims, + max_dims=max_dims, + ) + ) + + if not sizes: + dims = () + shape = () + else: + dims, shape = zip(*sizes) + data = create_data(shape, dtypes) + + return xr.Variable(dims, draw(data)) + + +@st.composite +def data_array( + draw, create_data, *, min_dims=1, max_dims=3, min_size=1, max_size=3, dtypes=None +): + name = draw(st.none() | st.text(min_size=1)) + if dtypes is None: + dtypes = all_dtypes + + sizes = st.lists( + elements=st.tuples(st.text(min_size=1), st.integers(min_size, max_size)), + min_size=min_dims, + max_size=max_dims, + unique_by=lambda x: x[0], + ) + drawn_sizes = draw(sizes) + dims, shape = zip(*drawn_sizes) + + data = draw(create_data(shape, dtypes)) + + return xr.DataArray( + data=data, + name=name, + dims=dims, + ) + + +@st.composite +def dataset( + draw, + create_data, + *, + min_dims=1, + max_dims=3, + min_size=1, + max_size=3, + min_vars=1, + max_vars=3, +): + dtypes = st.just(draw(all_dtypes)) + names = st.text(min_size=1) + sizes = dimension_sizes( + min_size=min_size, max_size=max_size, min_dims=min_dims, max_dims=max_dims + ) + + data_vars = sizes.flatmap( + lambda s: st.dictionaries( + keys=names.filter(lambda n: n not in dict(s)), + values=variable(create_data, sizes=s, dtypes=dtypes), + min_size=min_vars, + max_size=max_vars, + ) + ) + + return xr.Dataset(data_vars=draw(data_vars)) + + +def valid_axis(ndim): + if ndim == 0: + return st.none() | st.just(0) + return st.none() | st.integers(-ndim, ndim - 1) + + +def valid_axes(ndim): + return valid_axis(ndim) | npst.valid_tuple_axes(ndim, min_size=1) + + +def valid_dim(dims): + if not isinstance(dims, list): + dims = [dims] + + ndim = len(dims) + axis = valid_axis(ndim) + return axis.map(lambda axes: utils.valid_dims_from_axes(dims, axes)) + + +def valid_dims(dims): + if is_dict_like(dims): + dims = list(dims.keys()) + elif isinstance(dims, tuple): + dims = list(dims) + elif not isinstance(dims, list): + dims = [dims] + + ndim = len(dims) + axes = valid_axes(ndim) + return axes.map(lambda axes: utils.valid_dims_from_axes(dims, axes)) diff --git a/xarray/tests/duckarrays/base/utils.py b/xarray/tests/duckarrays/base/utils.py new file mode 100644 index 00000000000..2bd353e2116 --- /dev/null +++ b/xarray/tests/duckarrays/base/utils.py @@ -0,0 +1,36 @@ +import warnings +from contextlib import contextmanager + + +@contextmanager +def suppress_warning(category, message=""): + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=category, message=message) + + yield + + +def create_dimension_names(ndim): + return [f"dim_{n}" for n in range(ndim)] + + +def valid_dims_from_axes(dims, axes): + if axes is None: + return None + + if axes == 0 and len(dims) == 0: + return None + + if isinstance(axes, int): + return dims[axes] + + return [dims[axis] for axis in axes] + + +def valid_axes_from_dims(all_dims, dims): + if dims is None: + return None + elif isinstance(dims, list): + return [all_dims.index(dim) for dim in dims] + else: + return all_dims.index(dims) From acbfa69e9a7d49ce8379d912775f0b7e3619cdc0 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Thu, 11 Aug 2022 03:26:57 -0400 Subject: [PATCH 002/155] placed testing functions in their own directory --- xarray/testing/__init__.py | 23 +++++++++++++++++++++++ xarray/{ => testing}/testing.py | 9 --------- 2 files changed, 23 insertions(+), 9 deletions(-) create mode 100644 xarray/testing/__init__.py rename xarray/{ => testing}/testing.py (98%) diff --git a/xarray/testing/__init__.py b/xarray/testing/__init__.py new file mode 100644 index 00000000000..c9d1f9b1790 --- /dev/null +++ b/xarray/testing/__init__.py @@ -0,0 +1,23 @@ +from .testing import ( # noqa: F401 + _assert_dataarray_invariants, + _assert_dataset_invariants, + _assert_indexes_invariants_checks, + _assert_internal_invariants, + _assert_variable_invariants, + _data_allclose_or_equiv, + assert_allclose, + assert_chunks_equal, + assert_duckarray_allclose, + assert_duckarray_equal, + assert_equal, + assert_identical, +) + +__all__ = [ + "assert_allclose", + "assert_chunks_equal", + "assert_duckarray_equal", + "assert_duckarray_allclose", + "assert_equal", + "assert_identical", +] diff --git a/xarray/testing.py b/xarray/testing/testing.py similarity index 98% rename from xarray/testing.py rename to xarray/testing/testing.py index 59737e1d23e..89f701e2ba4 100644 --- a/xarray/testing.py +++ b/xarray/testing/testing.py @@ -12,15 +12,6 @@ from xarray.core.indexes import Index, PandasIndex, PandasMultiIndex, default_indexes from xarray.core.variable import IndexVariable, Variable -__all__ = ( - "assert_allclose", - "assert_chunks_equal", - "assert_duckarray_equal", - "assert_duckarray_allclose", - "assert_equal", - "assert_identical", -) - def ensure_warnings(func): # sometimes tests elevate warnings to errors From 73d763f520ee168c02a75c4129929ac2fe152337 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Thu, 11 Aug 2022 03:35:47 -0400 Subject: [PATCH 003/155] moved hypothesis strategies into new testing directory --- xarray/{tests/duckarrays/base => testing}/strategies.py | 0 xarray/{tests/duckarrays/base => testing}/utils.py | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename xarray/{tests/duckarrays/base => testing}/strategies.py (100%) rename xarray/{tests/duckarrays/base => testing}/utils.py (100%) diff --git a/xarray/tests/duckarrays/base/strategies.py b/xarray/testing/strategies.py similarity index 100% rename from xarray/tests/duckarrays/base/strategies.py rename to xarray/testing/strategies.py diff --git a/xarray/tests/duckarrays/base/utils.py b/xarray/testing/utils.py similarity index 100% rename from xarray/tests/duckarrays/base/utils.py rename to xarray/testing/utils.py From db2deff82202caf3346b13a64a1c44ba558115f8 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Thu, 11 Aug 2022 04:09:10 -0400 Subject: [PATCH 004/155] begin type hinting strategies --- xarray/testing/strategies.py | 45 ++++++++++++++++++++++++------------ 1 file changed, 30 insertions(+), 15 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 42eee29b554..009dc788ccd 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -1,12 +1,15 @@ +from typing import Callable, List, Tuple, Union + import hypothesis.extra.numpy as npst import hypothesis.strategies as st +import numpy as np import xarray as xr from xarray.core.utils import is_dict_like from . import utils -all_dtypes = ( +all_dtypes: st.SearchStrategy[np.dtype] = ( npst.integer_dtypes() | npst.unsigned_integer_dtypes() | npst.floating_dtypes() @@ -14,7 +17,7 @@ ) -def numpy_array(shape, dtypes=None): +def numpy_array(shape, dtypes=None) -> st.SearchStrategy[np.ndarray]: if dtypes is None: dtypes = all_dtypes @@ -31,7 +34,9 @@ def elements(dtype): ) -def dimension_sizes(min_dims, max_dims, min_size, max_size): +def dimension_sizes( + min_dims, max_dims, min_size, max_size +) -> st.SearchStrategy[List[Tuple[str, int]]]: sizes = st.lists( elements=st.tuples(st.text(min_size=1), st.integers(min_size, max_size)), min_size=min_dims, @@ -43,8 +48,8 @@ def dimension_sizes(min_dims, max_dims, min_size, max_size): @st.composite def variable( - draw, - create_data, + draw: st.DrawFn, + create_data: Callable, *, sizes=None, min_size=1, @@ -52,7 +57,8 @@ def variable( min_dims=1, max_dims=3, dtypes=None, -): +) -> st.SearchStrategy[xr.Variable]: + if sizes is None: sizes = draw( dimension_sizes( @@ -75,8 +81,16 @@ def variable( @st.composite def data_array( - draw, create_data, *, min_dims=1, max_dims=3, min_size=1, max_size=3, dtypes=None -): + draw: st.DrawFn, + create_data: Callable, + *, + min_dims=1, + max_dims=3, + min_size=1, + max_size=3, + dtypes=None, +) -> st.SearchStrategy[xr.DataArray]: + name = draw(st.none() | st.text(min_size=1)) if dtypes is None: dtypes = all_dtypes @@ -101,8 +115,8 @@ def data_array( @st.composite def dataset( - draw, - create_data, + draw: st.DrawFn, + create_data: Callable, *, min_dims=1, max_dims=3, @@ -110,7 +124,8 @@ def dataset( max_size=3, min_vars=1, max_vars=3, -): +) -> st.SearchStrategy[xr.Dataset]: + dtypes = st.just(draw(all_dtypes)) names = st.text(min_size=1) sizes = dimension_sizes( @@ -129,17 +144,17 @@ def dataset( return xr.Dataset(data_vars=draw(data_vars)) -def valid_axis(ndim): +def valid_axis(ndim) -> st.SearchStrategy[Union[None, int]]: if ndim == 0: return st.none() | st.just(0) return st.none() | st.integers(-ndim, ndim - 1) -def valid_axes(ndim): +def valid_axes(ndim) -> st.SearchStrategy[Union[None, int, Tuple[int, ...]]]: return valid_axis(ndim) | npst.valid_tuple_axes(ndim, min_size=1) -def valid_dim(dims): +def valid_dim(dims) -> st.SearchStrategy[str]: if not isinstance(dims, list): dims = [dims] @@ -148,7 +163,7 @@ def valid_dim(dims): return axis.map(lambda axes: utils.valid_dims_from_axes(dims, axes)) -def valid_dims(dims): +def valid_dims(dims) -> st.SearchStrategy[xr.DataArray]: if is_dict_like(dims): dims = list(dims.keys()) elif isinstance(dims, tuple): From 746cfc8ed2f45bfdf2f3b4d1bc8b39b3601f0879 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Thu, 11 Aug 2022 04:26:21 -0400 Subject: [PATCH 005/155] renamed strategies for consistency with hypothesis conventions --- xarray/testing/strategies.py | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 009dc788ccd..d84c84d81a8 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -1,4 +1,4 @@ -from typing import Callable, List, Tuple, Union +from typing import Any, Callable, List, Tuple, Union import hypothesis.extra.numpy as npst import hypothesis.strategies as st @@ -17,18 +17,19 @@ ) +def elements(dtype) -> st.SearchStrategy[Any]: + max_value = 100 + min_value = 0 if dtype.kind == "u" else -max_value + + return npst.from_dtype( + dtype, allow_infinity=False, min_value=min_value, max_value=max_value + ) + + def numpy_array(shape, dtypes=None) -> st.SearchStrategy[np.ndarray]: if dtypes is None: dtypes = all_dtypes - def elements(dtype): - max_value = 100 - min_value = 0 if dtype.kind == "u" else -max_value - - return npst.from_dtype( - dtype, allow_infinity=False, min_value=min_value, max_value=max_value - ) - return dtypes.flatmap( lambda dtype: npst.arrays(dtype=dtype, shape=shape, elements=elements(dtype)) ) @@ -47,7 +48,7 @@ def dimension_sizes( @st.composite -def variable( +def variables( draw: st.DrawFn, create_data: Callable, *, @@ -80,7 +81,7 @@ def variable( @st.composite -def data_array( +def dataarrays( draw: st.DrawFn, create_data: Callable, *, @@ -114,7 +115,7 @@ def data_array( @st.composite -def dataset( +def datasets( draw: st.DrawFn, create_data: Callable, *, @@ -135,7 +136,7 @@ def dataset( data_vars = sizes.flatmap( lambda s: st.dictionaries( keys=names.filter(lambda n: n not in dict(s)), - values=variable(create_data, sizes=s, dtypes=dtypes), + values=variables(create_data, sizes=s, dtypes=dtypes), min_size=min_vars, max_size=max_vars, ) From 03cd9debdb94b88257815e1e682d1845781219bf Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Thu, 11 Aug 2022 04:26:57 -0400 Subject: [PATCH 006/155] added strategies to public API (with experimental warning) --- doc/api.rst | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/doc/api.rst b/doc/api.rst index 11ae5de8531..86399891af3 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -1060,6 +1060,19 @@ Testing testing.assert_allclose testing.assert_chunks_equal +Hypothesis Testing Strategies +============================= + +.. warning:: + These strategies should be considered highly experimental, and liable to change at any time. + +.. autosummary:: + :toctree: generated/ + + testing.strategies.variables + testing.strategies.dataarrays + testing.strategies.datasets + Exceptions ========== From 2fe358392b226866a5e2e0acca56dc9f37e37baa Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Thu, 11 Aug 2022 04:45:00 -0400 Subject: [PATCH 007/155] strategies for chunking patterns --- doc/api.rst | 2 + xarray/testing/strategies.py | 187 ++++++++++++++++++++++++++++++++++- 2 files changed, 188 insertions(+), 1 deletion(-) diff --git a/doc/api.rst b/doc/api.rst index 86399891af3..6fc2587b253 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -1072,6 +1072,8 @@ Hypothesis Testing Strategies testing.strategies.variables testing.strategies.dataarrays testing.strategies.datasets + testing.strategies.chunks + testing.strategies.chunksizes Exceptions ========== diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index d84c84d81a8..68ac140f379 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -1,4 +1,4 @@ -from typing import Any, Callable, List, Tuple, Union +from typing import Any, Callable, List, Mapping, Optional, Set, Tuple, Union import hypothesis.extra.numpy as npst import hypothesis.strategies as st @@ -175,3 +175,188 @@ def valid_dims(dims) -> st.SearchStrategy[xr.DataArray]: ndim = len(dims) axes = valid_axes(ndim) return axes.map(lambda axes: utils.valid_dims_from_axes(dims, axes)) + + +@st.composite +def block_lengths( + draw: st.DrawFn, + ax_length: int, + min_chunk_length: int = 1, + max_chunk_length: Optional[int] = None, +) -> st.SearchStrategy[Tuple[int, ...]]: + """Generate different chunking patterns along one dimension of an array.""" + + chunks = [] + remaining_length = ax_length + while remaining_length > 0: + _max_chunk_length = ( + min(remaining_length, max_chunk_length) + if max_chunk_length + else remaining_length + ) + + if min_chunk_length > _max_chunk_length: + # if we are at the end of the array we have no choice but to use a smaller chunk + chunk = remaining_length + else: + chunk = draw( + st.integers(min_value=min_chunk_length, max_value=_max_chunk_length) + ) + + chunks.append(chunk) + remaining_length = remaining_length - chunk + + return tuple(chunks) + + +# TODO we could remove this once dask/9374 is merged upstream +@st.composite +def chunks( + draw: st.DrawFn, + shape: Tuple[int, ...], + axes: Optional[Union[int, Tuple[int, ...]]] = None, + min_chunk_length: int = 1, + max_chunk_length: Optional[int] = None, +) -> st.SearchStrategy[Tuple[Tuple[int, ...], ...]]: + """ + Generates different chunking patterns for an N-D array with a given shape. + + Returns chunking structure as a tuple of tuples of ints, with each inner tuple containing + the block lengths along one dimension of the array. + + You can limit chunking to specific axes using the `axes` kwarg, and specify minimum and + maximum block lengths. + + Requires the hypothesis package to be installed. + + Parameters + ---------- + shape : tuple of ints + Shape of the array for which you want to generate a chunking pattern. + axes : None or int or tuple of ints, optional + ... + min_chunk_length : int, default is 1 + Minimum chunk length to use along all axes. + max_chunk_length: int, optional + Maximum chunk length to use along all axes. + Default is that the chunk can be as long as the length of the array along that axis. + + Examples + -------- + Chunking along all axes by default + + >>> chunks(shape=(2, 3)).example() + ((1, 1), (1, 2)) + + Chunking only along the second axis + + >>> chunks(shape=(2, 3), axis=1).example() + ((2,), (1, 1, 1)) + + Minimum size chunks of length 2 along all axes + + >>> chunks(shape=(2, 3), min_chunk_length=2).example() + ((2,), (2, 1)) + + Smallest possible chunks along all axes + + >>> chunks(shape=(2, 3), max_chunk_length=1).example() + ((1, 1), (1, 1, 1)) + + Maximum size chunks along all axes + + >>> chunks(shape=(2, 3), axes=()).example() + ((2,), (3,)) + + See Also + -------- + testing.strategies.chunks + DataArray.chunk + DataArray.chunks + """ + + if min_chunk_length < 1 or not isinstance(min_chunk_length, int): + raise ValueError("min_chunk_length must be an integer >= 1") + + if max_chunk_length: + if max_chunk_length < 1 or not isinstance(min_chunk_length, int): + raise ValueError("max_chunk_length must be an integer >= 1") + + if axes is None: + axes = tuple(range(len(shape))) + elif isinstance(axes, int): + axes = (axes,) + + chunks = [] + for axis, ax_length in enumerate(shape): + + _max_chunk_length = ( + min(max_chunk_length, ax_length) if max_chunk_length else ax_length + ) + + if axes is not None and axis in axes: + block_lengths_along_ax = draw( + block_lengths( + ax_length, + min_chunk_length=min_chunk_length, + max_chunk_length=_max_chunk_length, + ) + ) + else: + # don't chunk along this dimension + block_lengths_along_ax = (ax_length,) + + chunks.append(block_lengths_along_ax) + + return tuple(chunks) + + +@st.composite +def chunksizes( + draw: st.DrawFn, + sizes: Mapping[str, int], + dims: Set[str] = None, + min_chunk_length: int = 1, + max_chunk_length: int = None, +) -> st.SearchStrategy[Mapping[str, Tuple[int, ...]]]: + """ + Generate different chunking patterns for an xarray object with given sizes. + + Returns chunking structure as a mapping of dimension names to tuples of ints, + with each tuple containing the block lengths along one dimension of the object. + + You can limit chunking to specific dimensions given by the `dim` kwarg. + + Requires the hypothesis package to be installed. + + Parameters + ---------- + sizes : mapping of dimension names to ints + Size of the object for which you want to generate a chunking pattern. + dims : set of str, optional + Dimensions to chunk along. Default is to chunk along all dimensions. + min_chunk_length : int, default is 1 + Minimum chunk length to use along all dimensions. + max_chunk_length: int, optional + Maximum chunk length to use along all dimensions. + Default is that the chunk can be as long as the length of the array along that dimension. + + See Also + -------- + testing.strategies.chunks + DataArray.chunk + DataArray.chunksizes + DataArray.sizes + """ + shape = tuple(sizes.values()) + axes = tuple(list(sizes.keys()).index(d) for d in dims) if dims else None + _chunks = draw( + chunks( + shape=shape, + axes=axes, + min_chunk_length=min_chunk_length, + max_chunk_length=max_chunk_length, + ) + ) + + return {d: c for d, c in zip(list(sizes.keys()), _chunks)} From 4db36290bd61813c50168aef8c22a36b3b029843 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Fri, 12 Aug 2022 21:31:27 -0400 Subject: [PATCH 008/155] rewrote variables strategy to have same signature as Variable constructor --- xarray/testing/strategies.py | 145 +++++++++++++++++++++++++++++------ 1 file changed, 121 insertions(+), 24 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 68ac140f379..45079e56df8 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -1,15 +1,18 @@ -from typing import Any, Callable, List, Mapping, Optional, Set, Tuple, Union +from typing import Any, Callable, List, Mapping, Optional, Sequence, Set, Tuple, Union +import string import hypothesis.extra.numpy as npst import hypothesis.strategies as st import numpy as np +from hypothesis import assume import xarray as xr from xarray.core.utils import is_dict_like from . import utils -all_dtypes: st.SearchStrategy[np.dtype] = ( +# required to exclude weirder dtypes e.g. unicode, byte_string, array, or nested dtypes. +valid_dtypes: st.SearchStrategy[np.dtype] = ( npst.integer_dtypes() | npst.unsigned_integer_dtypes() | npst.floating_dtypes() @@ -47,37 +50,131 @@ def dimension_sizes( return sizes +@st.composite +def np_arrays( + draw: st.DrawFn, + shape: Union[Tuple[int], st.SearchStrategy[Tuple[int]]] = None, + dtype: Union[np.dtype, st.SearchStrategy[np.dtype]] = None, +) -> st.SearchStrategy[np.ndarray]: + """ + Generates arbitrary numpy arrays with xarray-compatible dtypes. + + Parameters + ---------- + shape + dtype + Default is to use any of the valid_dtypes defined for xarray. + """ + if shape is None: + shape = draw(npst.array_shapes()) + elif isinstance(shape, st.SearchStrategy): + shape = draw(shape) + + if dtype is None: + dtype = draw(valid_dtypes) + elif isinstance(dtype, st.SearchStrategy): + dtype = draw(dtype) + + return draw(npst.arrays(dtype=dtype, shape=shape, elements=elements(dtype))) + + +def dimension_names( + min_ndims: int = 0, + max_ndims: int = 3, +) -> st.SearchStrategy[List[str]]: + """ + Generates arbitrary lists of valid dimension names. + """ + + return st.lists( + elements=st.text(alphabet=string.ascii_lowercase, min_size=1, max_size=5), + min_size=min_ndims, + max_size=max_ndims, + unique=True, + ) + + +# Is there a way to do this in general? +# Could make a Protocol... +T_Array = Any + + @st.composite def variables( draw: st.DrawFn, - create_data: Callable, - *, - sizes=None, - min_size=1, - max_size=3, - min_dims=1, - max_dims=3, - dtypes=None, + dims: Union[Sequence[str], st.SearchStrategy[str]] = None, + data: Union[T_Array, st.SearchStrategy[T_Array], None] = None, + attrs=None, + convert: Callable[[np.ndarray], T_Array] = lambda a: a, ) -> st.SearchStrategy[xr.Variable]: + """ + Generates arbitrary xarray.Variable objects. - if sizes is None: - sizes = draw( - dimension_sizes( - min_size=min_size, - max_size=max_size, - min_dims=min_dims, - max_dims=max_dims, - ) + Follows the signature of the xarray.Variable constructor, but you can also pass alternative strategies to generate + either numpy-like array data or dimension names. Passing both at once is forbidden. + + Passing nothing will generate a completely arbitrary Variable (backed by a numpy array). + + Parameters + ---------- + data: array-like, strategy which generates array-likes, or None + Default is to generate numpy data of arbitrary shape, values and dtype. + dims: Sequence of str, strategy which generates sequence of str, or None + Default is to generate arbitrary dimension names for each axis in data. + attrs: None + convert: Callable + Function which accepts one numpy array and returns one numpy-like array. + Default is a no-op. + """ + + if isinstance(data, st.SearchStrategy) and isinstance(dims, st.SearchStrategy): + # TODO could we relax this by adding a constraint? + raise TypeError( + "Passing strategies for both dims and data could generate inconsistent contents for Variable" ) - if not sizes: - dims = () - shape = () + if data is not None and isinstance(data, st.SearchStrategy): + data = draw(data) + if dims is not None and isinstance(dims, st.SearchStrategy): + dims = draw(dims) + + print(dims) + print(data) + + if data is not None and not dims: + # no dims -> generate dims to match data + dims = draw(dimension_names(min_ndims=data.ndim, max_ndims=data.ndim)) + + elif dims is not None and data is None: + # no data -> generate data to match dims + valid_shapes = npst.array_shapes(min_dims=len(dims), max_dims=len(dims)) + data = draw(np_arrays(shape=draw(valid_shapes))) + + elif data is not None and dims is not None: + # both data and dims provided -> check both are compatible + # TODO is this pointless because the xr.Variable constructor will check this anyway? + if len(dims) != data.ndim: + raise ValueError( + "Explicitly provided data must match explicitly provided dims, " + f"but len(dims) = {len(dims)} vs len(data.ndim) = {data.ndim}" + ) + else: - dims, shape = zip(*sizes) - data = create_data(shape, dtypes) + # nothing provided, so generate everything, but consistently + data = np_arrays() + # TODO this should be possible with flatmap + print(draw(data).ndim) + dims = data.flatmap( + lambda arr: dimension_names(min_ndims=arr.ndim, max_ndims=arr.ndim) + ) + # dims = draw(dimension_names()) + # assume(len(dims) == data.ndim) + + # duckarray = convert(data) - return xr.Variable(dims, draw(data)) + # print(data) + # print(dims) + return xr.Variable(dims=dims, data=data, attrs=attrs) @st.composite From 14d11aaa7cffd62f5e6fa12e0aa5bd9ce37890c8 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Fri, 12 Aug 2022 21:31:42 -0400 Subject: [PATCH 009/155] test variables strategy --- xarray/tests/test_strategies.py | 115 ++++++++++++++++++++++++++++++++ 1 file changed, 115 insertions(+) create mode 100644 xarray/tests/test_strategies.py diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py new file mode 100644 index 00000000000..2e8e926a869 --- /dev/null +++ b/xarray/tests/test_strategies.py @@ -0,0 +1,115 @@ +import pytest +import numpy as np +import numpy.testing as npt + +from hypothesis import given +import hypothesis.strategies as st +import hypothesis.extra.numpy as npst + +from xarray.testing.strategies import valid_dtypes, np_arrays, dimension_names, variables +from xarray import Dataset, DataArray +from xarray.core.variable import Variable + + +class TestNumpyArraysStrategy: + @given(np_arrays()) + def test_given_nothing(self, arr): + assert isinstance(arr, np.ndarray) + + @given(np_arrays(dtype=np.dtype("int32"))) + def test_fixed_dtype(self, arr): + assert arr.dtype == np.dtype("int32") + + @given(st.data()) + def test_arbitrary_valid_dtype(self, data): + valid_dtype = data.draw(valid_dtypes) + arr = data.draw(np_arrays(dtype=valid_dtype)) + assert arr.dtype == valid_dtype + + @given(np_arrays(shape=(2, 3))) + def test_fixed_shape(self, arr): + assert arr.shape == (2, 3) + + @given(st.data()) + def test_arbitrary_shape(self, data): + shape = data.draw(npst.array_shapes()) + arr = data.draw(np_arrays(shape=shape)) + assert arr.shape == shape + + +class TestDimensionNamesStrategy: + @given(dimension_names()) + def test_types(self, dims): + assert isinstance(dims, list) + for d in dims: + assert isinstance(d, str) + + @given(dimension_names()) + def test_unique(self, dims): + assert len(set(dims)) == len(dims) + + @given(dimension_names(min_ndims=3, max_ndims=3)) + def test_fixed_number_of_dims(self, dims): + assert isinstance(dims, list) + assert len(dims) == 3 + + +class TestVariablesStrategy: + @given(variables()) + def test_given_nothing(self, var): + assert isinstance(var, Variable) + + @given(st.data()) + def test_given_fixed_dims_and_fixed_data(self, data): + dims = ["x", "y"] + arr = np.asarray([[1, 2], [3, 4]]) + var = data.draw(variables(dims=dims, data=arr)) + + assert isinstance(var, Variable) + assert list(var.dims) == dims + npt.assert_equal(var.data, arr) + + with pytest.raises(ValueError): + data.draw(variables(dims=["x"], data=arr)) + + @given(st.data()) + def test_given_arbitrary_dims_and_arbitrary_data(self, data): + arr = data.draw(np_arrays()) + dims = data.draw(dimension_names()) + var = data.draw(variables(data=arr, dims=dims)) + + assert isinstance(var, Variable) + npt.assert_equal(var.data, arr) + assert var.dims == dims + + @given(st.data()) + def test_given_fixed_data(self, data): + arr = np.asarray([[1, 2], [3, 4]]) + var = data.draw(variables(data=arr)) + + assert isinstance(var, Variable) + npt.assert_equal(arr.data, arr) + + @given(st.data()) + def test_given_arbitrary_data(self, data): + arr = data.draw(np_arrays()) + var = data.draw(variables(data=arr)) + + assert isinstance(var, Variable) + npt.assert_equal(var.data, arr) + + @given(st.data()) + def test_given_fixed_dims(self, data): + dims = ["x", "y"] + var = data.draw(variables(dims=dims)) + assert isinstance(var, Variable) + assert list(var.dims) == dims + + @given(st.data()) + def test_given_arbitrary_dims(self, data): + dims = data.draw(dimension_names()) + var = data.draw(variables(dims=dims)) + + assert isinstance(var, Variable) + assert list(var.dims) == dims + From 418a359b148abf0965d3f80e9321f0f46d24c25a Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Fri, 12 Aug 2022 23:52:20 -0400 Subject: [PATCH 010/155] fixed most tests --- xarray/testing/strategies.py | 42 ++++++++++++++++----------------- xarray/tests/test_strategies.py | 27 ++++++++++++++------- 2 files changed, 39 insertions(+), 30 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 45079e56df8..344441da6fa 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -1,10 +1,9 @@ -from typing import Any, Callable, List, Mapping, Optional, Sequence, Set, Tuple, Union import string +from typing import Any, Callable, List, Mapping, Optional, Sequence, Set, Tuple, Union import hypothesis.extra.numpy as npst import hypothesis.strategies as st import numpy as np -from hypothesis import assume import xarray as xr from xarray.core.utils import is_dict_like @@ -18,9 +17,13 @@ | npst.floating_dtypes() | npst.complex_number_dtypes() ) +valid_dtypes.__doc__ = """Generates only numpy dtypes which xarray can handle.""" def elements(dtype) -> st.SearchStrategy[Any]: + """ + Generates scalar elements to go in a numpy-like array. + """ max_value = 100 min_value = 0 if dtype.kind == "u" else -max_value @@ -84,10 +87,18 @@ def dimension_names( ) -> st.SearchStrategy[List[str]]: """ Generates arbitrary lists of valid dimension names. - """ + Parameters + ---------- + min_ndims + Minimum number of dimensions in generated list. + max_ndims + Maximum number of dimensions in generated list. + """ return st.lists( - elements=st.text(alphabet=string.ascii_lowercase, min_size=1, max_size=5), + elements=st.text( + alphabet=string.ascii_lowercase, min_size=min_ndims, max_size=max_ndims + ), min_size=min_ndims, max_size=max_ndims, unique=True, @@ -123,7 +134,7 @@ def variables( Default is to generate arbitrary dimension names for each axis in data. attrs: None convert: Callable - Function which accepts one numpy array and returns one numpy-like array. + Function which accepts one numpy array and returns one numpy-like array of the same shape. Default is a no-op. """ @@ -138,9 +149,6 @@ def variables( if dims is not None and isinstance(dims, st.SearchStrategy): dims = draw(dims) - print(dims) - print(data) - if data is not None and not dims: # no dims -> generate dims to match data dims = draw(dimension_names(min_ndims=data.ndim, max_ndims=data.ndim)) @@ -152,7 +160,7 @@ def variables( elif data is not None and dims is not None: # both data and dims provided -> check both are compatible - # TODO is this pointless because the xr.Variable constructor will check this anyway? + # sort of pointless because the xr.Variable constructor will check this anyway if len(dims) != data.ndim: raise ValueError( "Explicitly provided data must match explicitly provided dims, " @@ -161,20 +169,10 @@ def variables( else: # nothing provided, so generate everything, but consistently - data = np_arrays() - # TODO this should be possible with flatmap - print(draw(data).ndim) - dims = data.flatmap( - lambda arr: dimension_names(min_ndims=arr.ndim, max_ndims=arr.ndim) - ) - # dims = draw(dimension_names()) - # assume(len(dims) == data.ndim) - - # duckarray = convert(data) + data = draw(np_arrays()) + dims = draw(dimension_names(min_ndims=data.ndim, max_ndims=data.ndim)) - # print(data) - # print(dims) - return xr.Variable(dims=dims, data=data, attrs=attrs) + return xr.Variable(dims=dims, data=convert(data), attrs=attrs) @st.composite diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index 2e8e926a869..56b0176608a 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -1,14 +1,18 @@ -import pytest +import hypothesis.extra.numpy as npst +import hypothesis.strategies as st import numpy as np import numpy.testing as npt +import pytest +from hypothesis import given, note -from hypothesis import given -import hypothesis.strategies as st -import hypothesis.extra.numpy as npst - -from xarray.testing.strategies import valid_dtypes, np_arrays, dimension_names, variables -from xarray import Dataset, DataArray +from xarray import DataArray, Dataset from xarray.core.variable import Variable +from xarray.testing.strategies import ( + dimension_names, + np_arrays, + valid_dtypes, + variables, +) class TestNumpyArraysStrategy: @@ -69,9 +73,10 @@ def test_given_fixed_dims_and_fixed_data(self, data): assert list(var.dims) == dims npt.assert_equal(var.data, arr) - with pytest.raises(ValueError): + with pytest.raises(ValueError, match="data must match"): data.draw(variables(dims=["x"], data=arr)) + @pytest.mark.xfail(reason="I don't understand why") @given(st.data()) def test_given_arbitrary_dims_and_arbitrary_data(self, data): arr = data.draw(np_arrays()) @@ -113,3 +118,9 @@ def test_given_arbitrary_dims(self, data): assert isinstance(var, Variable) assert list(var.dims) == dims + @given(st.data()) + def test_convert(self, data): + arr = data.draw(np_arrays()) + var = data.draw(variables(data=arr, convert=lambda x: x + 1)) + + npt.assert_equal(var.data, arr + 1) From c8a7d0e3741d0a289f918fd56716958fa1ef8471 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Fri, 12 Aug 2022 23:52:40 -0400 Subject: [PATCH 011/155] added helpers so far to API docs --- doc/api.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/api.rst b/doc/api.rst index 6fc2587b253..3bae86a5f9c 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -1069,6 +1069,9 @@ Hypothesis Testing Strategies .. autosummary:: :toctree: generated/ + testing.strategies.valid_dtypes + testing.strategies.np_arrays + testing.strategies.dimension_names testing.strategies.variables testing.strategies.dataarrays testing.strategies.datasets From d48acebce1ff62eb125d81893308cde9c0f2f395 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Sat, 13 Aug 2022 00:11:09 -0400 Subject: [PATCH 012/155] add hypothesis to docs CI env --- ci/requirements/doc.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/requirements/doc.yml b/ci/requirements/doc.yml index 437c493c92c..249c91a256e 100644 --- a/ci/requirements/doc.yml +++ b/ci/requirements/doc.yml @@ -10,6 +10,7 @@ dependencies: - cfgrib>=0.9 - dask-core>=2.30 - h5netcdf>=0.7.4 + - hypothesis - ipykernel - ipython - iris>=2.3 From a20e3410844c2e05e83df51183d0d84d626f7466 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Sat, 13 Aug 2022 00:11:35 -0400 Subject: [PATCH 013/155] add todo about attrs --- xarray/testing/strategies.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 344441da6fa..9225ae0e956 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -17,7 +17,7 @@ | npst.floating_dtypes() | npst.complex_number_dtypes() ) -valid_dtypes.__doc__ = """Generates only numpy dtypes which xarray can handle.""" +valid_dtypes.__doc__ = """Generates only those numpy dtypes which xarray can handle.""" def elements(dtype) -> st.SearchStrategy[Any]: @@ -95,6 +95,7 @@ def dimension_names( max_ndims Maximum number of dimensions in generated list. """ + return st.lists( elements=st.text( alphabet=string.ascii_lowercase, min_size=min_ndims, max_size=max_ndims @@ -115,7 +116,7 @@ def variables( draw: st.DrawFn, dims: Union[Sequence[str], st.SearchStrategy[str]] = None, data: Union[T_Array, st.SearchStrategy[T_Array], None] = None, - attrs=None, + attrs: Union[Mapping, st.SearchStrategy[Mapping], None] = None, convert: Callable[[np.ndarray], T_Array] = lambda a: a, ) -> st.SearchStrategy[xr.Variable]: """ @@ -132,7 +133,7 @@ def variables( Default is to generate numpy data of arbitrary shape, values and dtype. dims: Sequence of str, strategy which generates sequence of str, or None Default is to generate arbitrary dimension names for each axis in data. - attrs: None + attrs: dict_like or strategy which generates dicts, or None, optional convert: Callable Function which accepts one numpy array and returns one numpy-like array of the same shape. Default is a no-op. @@ -172,6 +173,12 @@ def variables( data = draw(np_arrays()) dims = draw(dimension_names(min_ndims=data.ndim, max_ndims=data.ndim)) + if isinstance(attrs, st.SearchStrategy): + attrs = draw(attrs) + elif attrs is None: + # TODO autogenerate some attributes + ... + return xr.Variable(dims=dims, data=convert(data), attrs=attrs) From 3a4816f77e70bc6a2979bf139b04ced8ba227b84 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Sat, 13 Aug 2022 12:24:26 -0400 Subject: [PATCH 014/155] draft of new user guide page on testing --- doc/api.rst | 2 + doc/user-guide/index.rst | 1 + doc/user-guide/testing.rst | 121 +++++++++++++++++++++++++++++++++++++ 3 files changed, 124 insertions(+) create mode 100644 doc/user-guide/testing.rst diff --git a/doc/api.rst b/doc/api.rst index 3bae86a5f9c..38717bd964a 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -1063,6 +1063,8 @@ Testing Hypothesis Testing Strategies ============================= +.. currentmodule:: xarray + .. warning:: These strategies should be considered highly experimental, and liable to change at any time. diff --git a/doc/user-guide/index.rst b/doc/user-guide/index.rst index 0ac25d68930..45f0ce352de 100644 --- a/doc/user-guide/index.rst +++ b/doc/user-guide/index.rst @@ -25,4 +25,5 @@ examples that describe many common tasks that you can accomplish with xarray. dask plotting options + testing duckarrays diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst new file mode 100644 index 00000000000..7b947a5b151 --- /dev/null +++ b/doc/user-guide/testing.rst @@ -0,0 +1,121 @@ +.. _testing: + +Testing your code +================= + +.. ipython:: python + :suppress: + + import numpy as np + import pandas as pd + import xarray as xr + + np.random.seed(123456) + +.. _asserts: + +Asserts +------- + +TODO + +.. _hypothesis: + +Hypothesis testing +------------------ + +.. note:: + + Testing with hypothesis is a fairly advanced topic. Before reading this section it is recommended that you take a look + at our guide to xarray's data structures, are familiar with conventional unit testing in pytest, and have seen the + hypothesis library documentation. + +``Hypothesis`` is a powerful library for property-based testing. +Instead of writing tests for one example at a time, it allows you to write tests parameterized by a source of many +dynamically generated examples. For example you might have written a test which you wish to be parameterized by the set +of all possible ``integers()``. + +Property-based testing is extremely powerful, because (unlike more conventional example-based testing) it can find bugs +that you did not even think to look for! + +Strategies +~~~~~~~~~~ + +Each source of examples is called a "strategy", and xarray provides a range of custom strategies which produce xarray +data structures containing arbitrary data. You can use these to efficiently test downstream code, +quickly ensuring that your code can handle xarray objects of all possible structures and contents. + +These strategies are accessible in the :py:module::`xarray.testing.strategies` module, which provides + +.. currentmodule:: xarray + +.. autosummary:: + :toctree: generated/ + + testing.strategies.valid_dtypes + testing.strategies.np_arrays + testing.strategies.dimension_names + testing.strategies.variables + testing.strategies.dataarrays + testing.strategies.datasets + testing.strategies.chunks + testing.strategies.chunksizes + +Generating Examples +~~~~~~~~~~~~~~~~~~~ + +To see an example of what each of these strategies might produce, you can call one followed by the `.example()` method, +which is a general hypothesis method valid for all strategies + +.. ipython:: python + + import xarray.testing.strategies as xrst + + # TODO change this to dataarray once written + xrst.variables().example() + xrst.variables().example() + xrst.variables().example() + +You can see that calling `.example()` multiple times will generate different examples, giving you an idea of the wide +range of data that the xarray strategies can generate. + +# TODO simple test example + +.. ipython:: python + + import hypothesis.strategies as st + +Chaining Strategies +~~~~~~~~~~~~~~~~~~~ + +Xarray's strategies can accept other strategies as arguments, allowing you to customise the contents of the generated +examples. + +.. ipython:: python + + xrst.variables(data=xrst.np_arrays(shape=(3, 4))) + +This also works with strategies defined in other packages, for example the ``chunks`` strategy defined in +``dask.array.strategies``. + + +Fixing Arguments +~~~~~~~~~~~~~~~~ + +If you want to fix one aspect of the data structure, whilst allowing variation in the generated examples +over all other aspects, then use ``st.just()``. + +.. ipython:: python + :okexcept: + + # Generates only dataarrays with dimensions ["x", "y"] + xrst.dataarrays(dims=st.just(["x", "y"]))).example() + +(This is technically another example of chaining strategies - ``hypothesis.strategies.just`` is simply a special +strategy that just contains a single example.) + + +Duck-type Conversion +~~~~~~~~~~~~~~~~~~~~ + +# TODO converting to duckarrays \ No newline at end of file From d0406a2a0178c860cd8a85fae09cd0ac64674f10 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Sat, 13 Aug 2022 12:25:01 -0400 Subject: [PATCH 015/155] types for dataarrays strategy --- xarray/testing/strategies.py | 41 +++++++++++++++--------------------- 1 file changed, 17 insertions(+), 24 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 9225ae0e956..6f44002ac16 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -1,3 +1,5 @@ +import pandas as pd + import string from typing import Any, Callable, List, Mapping, Optional, Sequence, Set, Tuple, Union @@ -114,8 +116,8 @@ def dimension_names( @st.composite def variables( draw: st.DrawFn, - dims: Union[Sequence[str], st.SearchStrategy[str]] = None, data: Union[T_Array, st.SearchStrategy[T_Array], None] = None, + dims: Union[Sequence[str], st.SearchStrategy[str]] = None, attrs: Union[Mapping, st.SearchStrategy[Mapping], None] = None, convert: Callable[[np.ndarray], T_Array] = lambda a: a, ) -> st.SearchStrategy[xr.Variable]: @@ -145,6 +147,8 @@ def variables( "Passing strategies for both dims and data could generate inconsistent contents for Variable" ) + # TODO remove this handling of non-strategies in favour of passing `st.just(value)` + if data is not None and isinstance(data, st.SearchStrategy): data = draw(data) if dims is not None and isinstance(dims, st.SearchStrategy): @@ -185,34 +189,23 @@ def variables( @st.composite def dataarrays( draw: st.DrawFn, - create_data: Callable, - *, - min_dims=1, - max_dims=3, - min_size=1, - max_size=3, - dtypes=None, + data: Union[T_Array, st.SearchStrategy[T_Array], None] = None, + coords: Union[Sequence[Union[xr.DataArray, pd.Index]], Mapping[str, xr.Variable]] = None, + dims: Union[Sequence[str], st.SearchStrategy[str]] = None, + name: str = None, + attrs: Union[Mapping, st.SearchStrategy[Mapping], None] = None, + convert: Callable[[np.ndarray], T_Array] = lambda a: a, ) -> st.SearchStrategy[xr.DataArray]: - name = draw(st.none() | st.text(min_size=1)) - if dtypes is None: - dtypes = all_dtypes - - sizes = st.lists( - elements=st.tuples(st.text(min_size=1), st.integers(min_size, max_size)), - min_size=min_dims, - max_size=max_dims, - unique_by=lambda x: x[0], - ) - drawn_sizes = draw(sizes) - dims, shape = zip(*drawn_sizes) - - data = draw(create_data(shape, dtypes)) + if name is None: + name = draw(st.none() | st.text(min_size=1)) return xr.DataArray( - data=data, + data=convert(data), + coords=coords, name=name, dims=dims, + attrs=attrs, ) @@ -229,7 +222,7 @@ def datasets( max_vars=3, ) -> st.SearchStrategy[xr.Dataset]: - dtypes = st.just(draw(all_dtypes)) + dtypes = st.just(draw(valid_dtypes)) names = st.text(min_size=1) sizes = dimension_sizes( min_size=min_size, max_size=max_size, min_dims=min_dims, max_dims=max_dims From 65a222d8a2b01f4b2781e9452068e2848b614f6e Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Sat, 13 Aug 2022 12:25:50 -0400 Subject: [PATCH 016/155] draft for chained chunking example --- xarray/tests/test_strategies.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index 56b0176608a..6ce36cb64f6 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -3,7 +3,7 @@ import numpy as np import numpy.testing as npt import pytest -from hypothesis import given, note +from hypothesis import given from xarray import DataArray, Dataset from xarray.core.variable import Variable @@ -124,3 +124,18 @@ def test_convert(self, data): var = data.draw(variables(data=arr, convert=lambda x: x + 1)) npt.assert_equal(var.data, arr + 1) + + +@pytest.mark.xfail +@given(st.data()) +def test_chained_chunking_example(data): + import dask.array.strategies as dast + + def chunk(da): + return da.chunk(dast.chunks(da.shape)) + + chunked_dataarrays = xrst.dataarrays().flatmap(chunk) + + chunked_da = data.draw(chunked_dataarrays()) + + assert ... From e1d718a2d9da53cba823690addfb89c7395baffc Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 13 Aug 2022 16:39:04 +0000 Subject: [PATCH 017/155] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- doc/user-guide/testing.rst | 2 +- xarray/testing/strategies.py | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index 7b947a5b151..d561f9b1692 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -118,4 +118,4 @@ strategy that just contains a single example.) Duck-type Conversion ~~~~~~~~~~~~~~~~~~~~ -# TODO converting to duckarrays \ No newline at end of file +# TODO converting to duckarrays diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 6f44002ac16..c70a1751ee7 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -1,11 +1,10 @@ -import pandas as pd - import string from typing import Any, Callable, List, Mapping, Optional, Sequence, Set, Tuple, Union import hypothesis.extra.numpy as npst import hypothesis.strategies as st import numpy as np +import pandas as pd import xarray as xr from xarray.core.utils import is_dict_like @@ -190,7 +189,9 @@ def variables( def dataarrays( draw: st.DrawFn, data: Union[T_Array, st.SearchStrategy[T_Array], None] = None, - coords: Union[Sequence[Union[xr.DataArray, pd.Index]], Mapping[str, xr.Variable]] = None, + coords: Union[ + Sequence[Union[xr.DataArray, pd.Index]], Mapping[str, xr.Variable] + ] = None, dims: Union[Sequence[str], st.SearchStrategy[str]] = None, name: str = None, attrs: Union[Mapping, st.SearchStrategy[Mapping], None] = None, From 57d0f5b672a4cbe9b3f9557f78897b18cf806d38 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Sun, 14 Aug 2022 08:12:21 -0400 Subject: [PATCH 018/155] only accept strategy objects --- xarray/testing/strategies.py | 50 +++++++++++++++++---------------- xarray/tests/test_strategies.py | 34 ++++++++++------------ 2 files changed, 41 insertions(+), 43 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 6f44002ac16..9ed2f83bfc1 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -1,11 +1,10 @@ -import pandas as pd - import string from typing import Any, Callable, List, Mapping, Optional, Sequence, Set, Tuple, Union import hypothesis.extra.numpy as npst import hypothesis.strategies as st import numpy as np +import pandas as pd import xarray as xr from xarray.core.utils import is_dict_like @@ -83,6 +82,10 @@ def np_arrays( return draw(npst.arrays(dtype=dtype, shape=shape, elements=elements(dtype))) +names = st.text(alphabet=string.ascii_lowercase) +names.__doc__ = """Generates arbitrary string names for dimensions / variables.""" + + def dimension_names( min_ndims: int = 0, max_ndims: int = 3, @@ -99,9 +102,7 @@ def dimension_names( """ return st.lists( - elements=st.text( - alphabet=string.ascii_lowercase, min_size=min_ndims, max_size=max_ndims - ), + elements=names, min_size=min_ndims, max_size=max_ndims, unique=True, @@ -116,9 +117,9 @@ def dimension_names( @st.composite def variables( draw: st.DrawFn, - data: Union[T_Array, st.SearchStrategy[T_Array], None] = None, - dims: Union[Sequence[str], st.SearchStrategy[str]] = None, - attrs: Union[Mapping, st.SearchStrategy[Mapping], None] = None, + data: st.SearchStrategy[T_Array] = None, + dims: st.SearchStrategy[str] = None, + attrs: st.SearchStrategy[Mapping] = None, convert: Callable[[np.ndarray], T_Array] = lambda a: a, ) -> st.SearchStrategy[xr.Variable]: """ @@ -131,41 +132,40 @@ def variables( Parameters ---------- - data: array-like, strategy which generates array-likes, or None + data: strategy which generates array-likes, optional Default is to generate numpy data of arbitrary shape, values and dtype. - dims: Sequence of str, strategy which generates sequence of str, or None + dims: Strategy which generates sequence of strings, optional Default is to generate arbitrary dimension names for each axis in data. - attrs: dict_like or strategy which generates dicts, or None, optional + attrs: Strategy which generates dicts, optional convert: Callable Function which accepts one numpy array and returns one numpy-like array of the same shape. Default is a no-op. """ - if isinstance(data, st.SearchStrategy) and isinstance(dims, st.SearchStrategy): - # TODO could we relax this by adding a constraint? + if any( + not isinstance(arg, st.SearchStrategy) and arg is not None + for arg in [data, dims, attrs] + ): raise TypeError( - "Passing strategies for both dims and data could generate inconsistent contents for Variable" + "Contents must be provided as a hypothesis.strategies.SearchStrategy object (or None)." + "To specify fixed contents, use hypothesis.strategies.just()." ) - # TODO remove this handling of non-strategies in favour of passing `st.just(value)` - - if data is not None and isinstance(data, st.SearchStrategy): - data = draw(data) - if dims is not None and isinstance(dims, st.SearchStrategy): - dims = draw(dims) - - if data is not None and not dims: + if data is not None and dims is None: # no dims -> generate dims to match data + data = draw(data) dims = draw(dimension_names(min_ndims=data.ndim, max_ndims=data.ndim)) elif dims is not None and data is None: # no data -> generate data to match dims + dims = draw(dims) valid_shapes = npst.array_shapes(min_dims=len(dims), max_dims=len(dims)) data = draw(np_arrays(shape=draw(valid_shapes))) elif data is not None and dims is not None: # both data and dims provided -> check both are compatible # sort of pointless because the xr.Variable constructor will check this anyway + data, dims = draw(data), draw(dims) if len(dims) != data.ndim: raise ValueError( "Explicitly provided data must match explicitly provided dims, " @@ -173,7 +173,7 @@ def variables( ) else: - # nothing provided, so generate everything, but consistently + # nothing provided, so generate everything consistently by drawing dims to match data data = draw(np_arrays()) dims = draw(dimension_names(min_ndims=data.ndim, max_ndims=data.ndim)) @@ -190,7 +190,9 @@ def variables( def dataarrays( draw: st.DrawFn, data: Union[T_Array, st.SearchStrategy[T_Array], None] = None, - coords: Union[Sequence[Union[xr.DataArray, pd.Index]], Mapping[str, xr.Variable]] = None, + coords: Union[ + Sequence[Union[xr.DataArray, pd.Index]], Mapping[str, xr.Variable] + ] = None, dims: Union[Sequence[str], st.SearchStrategy[str]] = None, name: str = None, attrs: Union[Mapping, st.SearchStrategy[Mapping], None] = None, diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index 6ce36cb64f6..feef19c25e9 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -67,14 +67,13 @@ def test_given_nothing(self, var): def test_given_fixed_dims_and_fixed_data(self, data): dims = ["x", "y"] arr = np.asarray([[1, 2], [3, 4]]) - var = data.draw(variables(dims=dims, data=arr)) + var = data.draw(variables(dims=st.just(dims), data=st.just(arr))) - assert isinstance(var, Variable) assert list(var.dims) == dims npt.assert_equal(var.data, arr) with pytest.raises(ValueError, match="data must match"): - data.draw(variables(dims=["x"], data=arr)) + data.draw(variables(dims=st.just(["x"]), data=st.just(arr))) @pytest.mark.xfail(reason="I don't understand why") @given(st.data()) @@ -83,47 +82,44 @@ def test_given_arbitrary_dims_and_arbitrary_data(self, data): dims = data.draw(dimension_names()) var = data.draw(variables(data=arr, dims=dims)) - assert isinstance(var, Variable) npt.assert_equal(var.data, arr) assert var.dims == dims @given(st.data()) def test_given_fixed_data(self, data): arr = np.asarray([[1, 2], [3, 4]]) - var = data.draw(variables(data=arr)) + var = data.draw(variables(data=st.just(arr))) - assert isinstance(var, Variable) - npt.assert_equal(arr.data, arr) + npt.assert_equal(var.data, arr) @given(st.data()) def test_given_arbitrary_data(self, data): - arr = data.draw(np_arrays()) - var = data.draw(variables(data=arr)) + shape = (2, 3) + arrs = np_arrays(shape=shape) + var = data.draw(variables(data=arrs)) - assert isinstance(var, Variable) - npt.assert_equal(var.data, arr) + assert var.data.shape == shape @given(st.data()) def test_given_fixed_dims(self, data): dims = ["x", "y"] - var = data.draw(variables(dims=dims)) - assert isinstance(var, Variable) + var = data.draw(variables(dims=st.just(dims))) + assert list(var.dims) == dims @given(st.data()) def test_given_arbitrary_dims(self, data): - dims = data.draw(dimension_names()) + dims = dimension_names(min_ndims=1, max_ndims=1) var = data.draw(variables(dims=dims)) - assert isinstance(var, Variable) - assert list(var.dims) == dims + assert len(list(var.dims)) == 1 @given(st.data()) def test_convert(self, data): - arr = data.draw(np_arrays()) - var = data.draw(variables(data=arr, convert=lambda x: x + 1)) + arr = st.just(np.asarray([1, 2, 3])) + var = data.draw(variables(data=arr, convert=lambda x: x * 2)) - npt.assert_equal(var.data, arr + 1) + npt.assert_equal(var.data, np.asarray([2, 4, 6])) @pytest.mark.xfail From 82c734cc9712fe6c5404676206452115e58ebcc0 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Sun, 14 Aug 2022 08:26:52 -0400 Subject: [PATCH 019/155] fixed failure with passing in two custom strategies that must be compatible --- xarray/testing/strategies.py | 12 +++++------- xarray/tests/test_strategies.py | 13 ++++--------- 2 files changed, 9 insertions(+), 16 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 9ed2f83bfc1..60cca9cbbd6 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -5,6 +5,7 @@ import hypothesis.strategies as st import numpy as np import pandas as pd +from hypothesis import assume import xarray as xr from xarray.core.utils import is_dict_like @@ -163,14 +164,11 @@ def variables( data = draw(np_arrays(shape=draw(valid_shapes))) elif data is not None and dims is not None: - # both data and dims provided -> check both are compatible - # sort of pointless because the xr.Variable constructor will check this anyway + # both data and dims provided -> check drawn examples are compatible data, dims = draw(data), draw(dims) - if len(dims) != data.ndim: - raise ValueError( - "Explicitly provided data must match explicitly provided dims, " - f"but len(dims) = {len(dims)} vs len(data.ndim) = {data.ndim}" - ) + # TODO is there another way to enforce this assumption? + # TODO how do I write a test that checks that the hypothesis Unsatisfiable error will be raised? + assume(data.ndim == len(dims)) else: # nothing provided, so generate everything consistently by drawing dims to match data diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index feef19c25e9..601315f9b29 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -72,18 +72,13 @@ def test_given_fixed_dims_and_fixed_data(self, data): assert list(var.dims) == dims npt.assert_equal(var.data, arr) - with pytest.raises(ValueError, match="data must match"): - data.draw(variables(dims=st.just(["x"]), data=st.just(arr))) - - @pytest.mark.xfail(reason="I don't understand why") @given(st.data()) def test_given_arbitrary_dims_and_arbitrary_data(self, data): - arr = data.draw(np_arrays()) - dims = data.draw(dimension_names()) - var = data.draw(variables(data=arr, dims=dims)) + arrs = np_arrays(shape=(2, 3)) + dims = dimension_names(min_ndims=2) + var = data.draw(variables(data=arrs, dims=dims)) - npt.assert_equal(var.data, arr) - assert var.dims == dims + assert var.shape == (2, 3) @given(st.data()) def test_given_fixed_data(self, data): From 029f19a309c54ee64a20a86135aa8bd0468f61bd Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Sun, 14 Aug 2022 11:54:11 -0400 Subject: [PATCH 020/155] syntax error in example --- doc/user-guide/testing.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index 7b947a5b151..85eda5efd01 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -109,7 +109,7 @@ over all other aspects, then use ``st.just()``. :okexcept: # Generates only dataarrays with dimensions ["x", "y"] - xrst.dataarrays(dims=st.just(["x", "y"]))).example() + xrst.dataarrays(dims=st.just(["x", "y"])).example() (This is technically another example of chaining strategies - ``hypothesis.strategies.just`` is simply a special strategy that just contains a single example.) @@ -118,4 +118,4 @@ strategy that just contains a single example.) Duck-type Conversion ~~~~~~~~~~~~~~~~~~~~ -# TODO converting to duckarrays \ No newline at end of file +# TODO converting to duckarrays From 46895fe3ea3bcd251f68a428ef78c7d80dc6098d Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Sun, 14 Aug 2022 21:58:23 -0400 Subject: [PATCH 021/155] allow sizes dict as argument to variables --- doc/api.rst | 4 + xarray/testing/strategies.py | 153 ++++++++++++++++++++++++-------- xarray/tests/test_strategies.py | 48 +++++++++- 3 files changed, 166 insertions(+), 39 deletions(-) diff --git a/doc/api.rst b/doc/api.rst index 38717bd964a..581ab74a6f9 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -1073,9 +1073,13 @@ Hypothesis Testing Strategies testing.strategies.valid_dtypes testing.strategies.np_arrays + testing.strategies.names testing.strategies.dimension_names + testing.strategies.dimension_sizes testing.strategies.variables + testing.strategies.coordinate_variables testing.strategies.dataarrays + testing.strategies.data_variables testing.strategies.datasets testing.strategies.chunks testing.strategies.chunksizes diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 60cca9cbbd6..01e62c61ecf 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -12,6 +12,19 @@ from . import utils +__all__ = [ + "valid_dtypes", + "np_arrays", + "names", + "dimension_names", + "dimension_sizes", + "variables", + "coordinate_variables", + "dataarrays", + "data_variables", + "datasets", +] + # required to exclude weirder dtypes e.g. unicode, byte_string, array, or nested dtypes. valid_dtypes: st.SearchStrategy[np.dtype] = ( npst.integer_dtypes() @@ -34,27 +47,6 @@ def elements(dtype) -> st.SearchStrategy[Any]: ) -def numpy_array(shape, dtypes=None) -> st.SearchStrategy[np.ndarray]: - if dtypes is None: - dtypes = all_dtypes - - return dtypes.flatmap( - lambda dtype: npst.arrays(dtype=dtype, shape=shape, elements=elements(dtype)) - ) - - -def dimension_sizes( - min_dims, max_dims, min_size, max_size -) -> st.SearchStrategy[List[Tuple[str, int]]]: - sizes = st.lists( - elements=st.tuples(st.text(min_size=1), st.integers(min_size, max_size)), - min_size=min_dims, - max_size=max_dims, - unique_by=lambda x: x[0], - ) - return sizes - - @st.composite def np_arrays( draw: st.DrawFn, @@ -83,7 +75,7 @@ def np_arrays( return draw(npst.arrays(dtype=dtype, shape=shape, elements=elements(dtype))) -names = st.text(alphabet=string.ascii_lowercase) +names = st.text(alphabet=string.ascii_lowercase, min_size=1) names.__doc__ = """Generates arbitrary string names for dimensions / variables.""" @@ -92,7 +84,7 @@ def dimension_names( max_ndims: int = 3, ) -> st.SearchStrategy[List[str]]: """ - Generates arbitrary lists of valid dimension names. + Generates an arbitrary list of valid dimension names. Parameters ---------- @@ -110,6 +102,42 @@ def dimension_names( ) +def dimension_sizes( + min_ndims: int = 0, + max_ndims: int = 3, + min_length=1, + max_length=None, +) -> st.SearchStrategy[Mapping[str, int]]: + """ + Generates an arbitrary mapping from dimension names to lengths. + + Parameters + ---------- + min_ndims: int, optional + Minimum number of dimensions in generated list. + Default is 1. + max_ndims: int, optional + Maximum number of dimensions in generated list. + Default is 3 + min_length: int, optional + Minimum size of a dimension. + Default is 1. + max_length: int, optional + Minimum size of a dimension. + Default is `min_size + 5` + """ + + if max_length is None: + max_length = min_length + 5 + + return st.dictionaries( + keys=names, + values=st.integers(min_value=min_length, max_value=max_length), + min_size=min_ndims, + max_size=max_ndims, + ) + + # Is there a way to do this in general? # Could make a Protocol... T_Array = Any @@ -119,7 +147,9 @@ def dimension_names( def variables( draw: st.DrawFn, data: st.SearchStrategy[T_Array] = None, - dims: st.SearchStrategy[str] = None, + dims: Union[ + st.SearchStrategy[List[str]], st.SearchStrategy[Mapping[str, int]] + ] = None, attrs: st.SearchStrategy[Mapping] = None, convert: Callable[[np.ndarray], T_Array] = lambda a: a, ) -> st.SearchStrategy[xr.Variable]: @@ -133,7 +163,7 @@ def variables( Parameters ---------- - data: strategy which generates array-likes, optional + data: Strategy generating array-likes, optional Default is to generate numpy data of arbitrary shape, values and dtype. dims: Strategy which generates sequence of strings, optional Default is to generate arbitrary dimension names for each axis in data. @@ -160,15 +190,29 @@ def variables( elif dims is not None and data is None: # no data -> generate data to match dims dims = draw(dims) - valid_shapes = npst.array_shapes(min_dims=len(dims), max_dims=len(dims)) - data = draw(np_arrays(shape=draw(valid_shapes))) + if isinstance(dims, List): + valid_shapes = npst.array_shapes(min_dims=len(dims), max_dims=len(dims)) + data = draw(np_arrays(shape=draw(valid_shapes))) + else: + # should be a mapping of form {dim_names: lengths} + shape = tuple(dims.values()) + data = draw(np_arrays(shape=shape)) elif data is not None and dims is not None: # both data and dims provided -> check drawn examples are compatible - data, dims = draw(data), draw(dims) - # TODO is there another way to enforce this assumption? + dims = draw(dims) + + # TODO is there another way to enforce these assumptions? This is very like to fail hypothesis' health checks # TODO how do I write a test that checks that the hypothesis Unsatisfiable error will be raised? - assume(data.ndim == len(dims)) + # TODO or we could just raise in this case? + if isinstance(dims, List): + data = draw(data) + assume(data.ndim == len(dims)) + else: + # should be a mapping of form {dim_names: lengths} + data = draw(data) + shape = tuple(dims.values()) + assume(data.shape == shape) else: # nothing provided, so generate everything consistently by drawing dims to match data @@ -184,21 +228,54 @@ def variables( return xr.Variable(dims=dims, data=convert(data), attrs=attrs) +def subsets_of(l: st.SearchStrategy[List[Any]]) -> st.SearchStrategy[List[Any]]: + + return st.lists(elements=st.sampled_from(l), unique=True) + + +@st.composite +def _alignable_variables( + draw: st.DrawFn, + dims: st.SearchStrategy[List[str]], +) -> st.SearchStrategy[List[xr.Variable]]: + dims = draw(subsets_of(dims)) + sizes = ... + return st.lists(variables(dims=dims)) + + +def coordinate_variables( + dims: st.SearchStrategy[List[str]], +) -> st.SearchStrategy[List[xr.Variable]]: + # TODO specifically generate dimension coordinates + return _alignable_variables(dims) + + @st.composite def dataarrays( draw: st.DrawFn, - data: Union[T_Array, st.SearchStrategy[T_Array], None] = None, + data: st.SearchStrategy[T_Array] = None, coords: Union[ Sequence[Union[xr.DataArray, pd.Index]], Mapping[str, xr.Variable] ] = None, - dims: Union[Sequence[str], st.SearchStrategy[str]] = None, - name: str = None, - attrs: Union[Mapping, st.SearchStrategy[Mapping], None] = None, + dims: st.SearchStrategy[List[str]] = None, + name: st.SearchStrategy[Union[str, None]] = None, + attrs: st.SearchStrategy[Mapping] = None, convert: Callable[[np.ndarray], T_Array] = lambda a: a, ) -> st.SearchStrategy[xr.DataArray]: if name is None: - name = draw(st.none() | st.text(min_size=1)) + name = draw(st.none() | names) + + if data is not None and dims is None: + raise NotImplementedError() + elif data is None and dims is not None: + raise NotImplementedError() + elif data is not None and dims is None: + raise NotImplementedError() + else: + data = draw(np_arrays()) + dims = draw(dimension_names(min_ndims=data.ndim, max_ndims=data.ndim)) + coords = draw(coordinate_variables(dims=dims)) return xr.DataArray( data=convert(data), @@ -209,6 +286,12 @@ def dataarrays( ) +def data_variables( + dims: st.SearchStrategy[List[str]], +) -> st.SearchStrategy[List[xr.Variable]]: + return _alignable_variables(dims) + + @st.composite def datasets( draw: st.DrawFn, diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index 601315f9b29..d3c3266e831 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -8,7 +8,9 @@ from xarray import DataArray, Dataset from xarray.core.variable import Variable from xarray.testing.strategies import ( + dataarrays, dimension_names, + dimension_sizes, np_arrays, valid_dtypes, variables, @@ -58,13 +60,27 @@ def test_fixed_number_of_dims(self, dims): assert len(dims) == 3 +class TestDimensionSizesStrategy: + @given(dimension_sizes()) + def test_types(self, dims): + assert isinstance(dims, dict) + for d, n in dims.items(): + assert isinstance(d, str) + assert isinstance(n, int) + + @given(dimension_sizes(min_ndims=3, max_ndims=3)) + def test_fixed_number_of_dims(self, dims): + assert isinstance(dims, dict) + assert len(dims) == 3 + + class TestVariablesStrategy: @given(variables()) def test_given_nothing(self, var): assert isinstance(var, Variable) @given(st.data()) - def test_given_fixed_dims_and_fixed_data(self, data): + def test_given_fixed_dims_list_and_fixed_data(self, data): dims = ["x", "y"] arr = np.asarray([[1, 2], [3, 4]]) var = data.draw(variables(dims=st.just(dims), data=st.just(arr))) @@ -73,7 +89,7 @@ def test_given_fixed_dims_and_fixed_data(self, data): npt.assert_equal(var.data, arr) @given(st.data()) - def test_given_arbitrary_dims_and_arbitrary_data(self, data): + def test_given_arbitrary_dims_list_and_arbitrary_data(self, data): arrs = np_arrays(shape=(2, 3)) dims = dimension_names(min_ndims=2) var = data.draw(variables(data=arrs, dims=dims)) @@ -96,19 +112,34 @@ def test_given_arbitrary_data(self, data): assert var.data.shape == shape @given(st.data()) - def test_given_fixed_dims(self, data): + def test_given_fixed_dims_list(self, data): dims = ["x", "y"] var = data.draw(variables(dims=st.just(dims))) assert list(var.dims) == dims @given(st.data()) - def test_given_arbitrary_dims(self, data): + def test_given_arbitrary_dims_list(self, data): dims = dimension_names(min_ndims=1, max_ndims=1) var = data.draw(variables(dims=dims)) assert len(list(var.dims)) == 1 + @given(st.data()) + def test_given_fixed_sizes(self, data): + dims = {"x": 3, "y": 4} + var = data.draw(variables(dims=st.just(dims))) + + assert var.dims == ("x", "y") + assert var.shape == (3, 4) + + @given(st.data()) + def test_given_fixed_sizes_and_arbitrary_data(self, data): + arrs = np_arrays(shape=(2, 3)) + var = data.draw(variables(data=arrs, dims=st.just({"x": 2, "y": 3}))) + + assert var.shape == (2, 3) + @given(st.data()) def test_convert(self, data): arr = st.just(np.asarray([1, 2, 3])) @@ -117,6 +148,15 @@ def test_convert(self, data): npt.assert_equal(var.data, np.asarray([2, 4, 6])) +@pytest.mark.xfail +class TestDataArraysStrategy: + @given(dataarrays()) + def test_given_nothing(self, da): + print(da) + assert isinstance(da, DataArray) + assert False + + @pytest.mark.xfail @given(st.data()) def test_chained_chunking_example(data): From 50c62e9ffdf9f6f03db05b54caf60259b5ef91c3 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Sun, 14 Aug 2022 23:12:20 -0400 Subject: [PATCH 022/155] copied subsequences_of strategy --- xarray/testing/strategies.py | 64 ++++++++++++++++++++++--- xarray/tests/test_strategies.py | 83 +++++++++++++++++++++++++++++++++ 2 files changed, 140 insertions(+), 7 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 01e62c61ecf..fcebd06f75b 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -1,11 +1,23 @@ import string -from typing import Any, Callable, List, Mapping, Optional, Sequence, Set, Tuple, Union +from typing import ( + Any, + Callable, + List, + Mapping, + Optional, + Sequence, + Set, + Tuple, + TypeVar, + Union, +) import hypothesis.extra.numpy as npst import hypothesis.strategies as st import numpy as np import pandas as pd from hypothesis import assume +from hypothesis.internal.validation import check_valid_sizes import xarray as xr from xarray.core.utils import is_dict_like @@ -228,18 +240,54 @@ def variables( return xr.Variable(dims=dims, data=convert(data), attrs=attrs) -def subsets_of(l: st.SearchStrategy[List[Any]]) -> st.SearchStrategy[List[Any]]: +El = TypeVar("El") + + +# All from the unfinished PR https://github.com/HypothesisWorks/hypothesis/pull/1533 +# TODO Should move this function upstream by opening new PR +@st.composite +def subsequences_of( + draw: st.DrawFn, + elements: Sequence[El], + min_size: int = 0, + max_size: int = None, +) -> st.SearchStrategy[Sequence[El]]: + """ + Returns a strategy which generates sub-sequences of the input sequence. + + Order is guaranteed to be preserved in the result. + + Parameters + ---------- + elements: Elements from which to construct the subsequence + min_size: int + Minimum size of the returned subsequences. + Default is 0. + max_size: int, optional + Maximum size of the returned subsequences. + Default is the full size of the input sequence. + """ + if max_size is None: + max_size = len(elements) + check_valid_sizes(min_size, max_size) + + def element_mask() -> List[bool]: + num_include = draw(st.integers(min_size, max_size)) + num_exclude = len(elements) - num_include + choices = [True] * num_include + [False] * num_exclude + assert len(elements) == len(choices) + return draw(st.permutations(choices)) - return st.lists(elements=st.sampled_from(l), unique=True) + element_includes = zip(elements, element_mask()) + return sorted(element for element, include in element_includes if include) @st.composite def _alignable_variables( draw: st.DrawFn, - dims: st.SearchStrategy[List[str]], + dim_sizes: st.SearchStrategy[Mapping[str, int]], ) -> st.SearchStrategy[List[xr.Variable]]: - dims = draw(subsets_of(dims)) - sizes = ... + dims = draw(subsequences_of(dim_sizes)) return st.lists(variables(dims=dims)) @@ -257,7 +305,9 @@ def dataarrays( coords: Union[ Sequence[Union[xr.DataArray, pd.Index]], Mapping[str, xr.Variable] ] = None, - dims: st.SearchStrategy[List[str]] = None, + dims: Union[ + st.SearchStrategy[List[str]], st.SearchStrategy[Mapping[str, int]] + ] = None, name: st.SearchStrategy[Union[str, None]] = None, attrs: st.SearchStrategy[Mapping] = None, convert: Callable[[np.ndarray], T_Array] = lambda a: a, diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index d3c3266e831..bf382919442 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -12,6 +12,7 @@ dimension_names, dimension_sizes, np_arrays, + subsequences_of, valid_dtypes, variables, ) @@ -148,6 +149,88 @@ def test_convert(self, data): npt.assert_equal(var.data, np.asarray([2, 4, 6])) +# All from the unfinished PR https://github.com/HypothesisWorks/hypothesis/pull/1533 +class TestSubsequencesOfStrategy: + @pytest.mark.xfail( + reason="Can't work out how to import assert_no_examples from hypothesis.tests.common.debug" + ) + def test_subsequence_of_empty(self): + sub_seq_strat = st.lists(st.none(), max_size=0) + assert_no_examples(sub_seq_strat) + + @given(st.data(), st.lists(st.integers())) + def test_subsequence_sizing(self, data, seq): + sub_seq_strat = subsequences_of(seq) + sub_seq = data.draw(sub_seq_strat) + + assert isinstance(sub_seq, list) + assert len(sub_seq) <= len(seq) + + @given(st.data(), st.lists(st.integers())) + def test_subsequence_only_original_elements(self, data, seq): + sub_seq_strat = subsequences_of(seq) + sub_seq = data.draw(sub_seq_strat) + + assert isinstance(sub_seq, list) + assert len(sub_seq) <= len(seq) + + @given(st.data(), st.lists(st.integers())) + def test_subsequence_elements_not_over_drawn(self, data, seq): + sub_seq_strat = subsequences_of(seq) + sub_seq = data.draw(sub_seq_strat) + + assert not (set(sub_seq) - set(seq)) + + @given(st.data(), st.lists(st.integers())) + def test_subsequence_original_elements_not_over_produced(self, data, seq): + sub_seq_strat = subsequences_of(seq) + sub_seq = data.draw(sub_seq_strat) + + # Per unique item, check that they don't occur in the subsequence + # more times that they appear in the source. + for item in set(sub_seq): + assert sub_seq.count(item) <= seq.count(item) + + @given(st.data(), st.lists(st.integers())) + def test_subsequence_max_size_constraint(self, data, seq): + max_size_strat = st.integers(min_value=0, max_value=len(seq)) + max_size = data.draw(max_size_strat) + + sub_seq_strat = subsequences_of(seq, max_size=max_size) + sub_seq = data.draw(sub_seq_strat) + + assert len(sub_seq) <= max_size + + @given(st.data(), st.lists(st.integers())) + def test_subsequence_min_size_constraint(self, data, seq): + min_size_strat = st.integers(min_value=0, max_value=len(seq)) + min_size = data.draw(min_size_strat) + + sub_seq_strat = subsequences_of(seq, min_size=min_size) + sub_seq = data.draw(sub_seq_strat) + + assert len(sub_seq) >= min_size + + @given(st.data(), st.lists(st.integers())) + def test_subsequence_min_max_size_constraint(self, data, seq): + min_size_strat = st.integers(min_value=0, max_value=len(seq)) + min_size = data.draw(min_size_strat) + + max_size_strat = st.integers(min_value=min_size, max_value=len(seq)) + max_size = data.draw(max_size_strat) + + sub_seq_strat = subsequences_of(seq, min_size=min_size, max_size=max_size) + sub_seq = data.draw(sub_seq_strat) + + assert min_size <= len(sub_seq) <= max_size + + # this is a new test, important for keeping dimension names in order + @given(st.data(), st.lists(st.integers())) + def test_ordering_preserved(self, data, seq): + subsequence_of_dims = data.draw(subsequences_of(seq)) + assert sorted(subsequence_of_dims) == subsequence_of_dims + + @pytest.mark.xfail class TestDataArraysStrategy: @given(dataarrays()) From e21555a05094790dbf44926f65eb7c6462c3f955 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Mon, 15 Aug 2022 13:41:05 -0400 Subject: [PATCH 023/155] coordinate_variables generates non-dimensional coords --- xarray/testing/strategies.py | 91 ++++++++++++++++++++++++++------- xarray/tests/test_strategies.py | 30 +++++++++-- 2 files changed, 99 insertions(+), 22 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index fcebd06f75b..489434eb99b 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -87,7 +87,7 @@ def np_arrays( return draw(npst.arrays(dtype=dtype, shape=shape, elements=elements(dtype))) -names = st.text(alphabet=string.ascii_lowercase, min_size=1) +names = st.text(alphabet=string.ascii_lowercase, min_size=1, max_size=3) names.__doc__ = """Generates arbitrary string names for dimensions / variables.""" @@ -117,8 +117,8 @@ def dimension_names( def dimension_sizes( min_ndims: int = 0, max_ndims: int = 3, - min_length=1, - max_length=None, + min_length: int = 1, + max_length: int = None, ) -> st.SearchStrategy[Mapping[str, int]]: """ Generates an arbitrary mapping from dimension names to lengths. @@ -130,13 +130,13 @@ def dimension_sizes( Default is 1. max_ndims: int, optional Maximum number of dimensions in generated list. - Default is 3 + Default is 3. min_length: int, optional Minimum size of a dimension. Default is 1. max_length: int, optional Minimum size of a dimension. - Default is `min_size + 5` + Default is `min_length` + 5. """ if max_length is None: @@ -243,12 +243,12 @@ def variables( El = TypeVar("El") -# All from the unfinished PR https://github.com/HypothesisWorks/hypothesis/pull/1533 +# Mostly from the unfinished PR https://github.com/HypothesisWorks/hypothesis/pull/1533 # TODO Should move this function upstream by opening new PR @st.composite def subsequences_of( draw: st.DrawFn, - elements: Sequence[El], + elements: Union[Sequence[El], Mapping[str, El]], min_size: int = 0, max_size: int = None, ) -> st.SearchStrategy[Sequence[El]]: @@ -278,24 +278,71 @@ def element_mask() -> List[bool]: assert len(elements) == len(choices) return draw(st.permutations(choices)) - element_includes = zip(elements, element_mask()) - return sorted(element for element, include in element_includes if include) + if isinstance(elements, dict): + element_includes = zip(elements.keys(), elements.values(), element_mask()) + return {k: v for k, v, include in element_includes if include} + else: + element_includes = zip(elements, element_mask()) + return sorted(element for element, include in element_includes if include) @st.composite def _alignable_variables( draw: st.DrawFn, - dim_sizes: st.SearchStrategy[Mapping[str, int]], + dim_sizes: Mapping[str, int], ) -> st.SearchStrategy[List[xr.Variable]]: - dims = draw(subsequences_of(dim_sizes)) - return st.lists(variables(dims=dims)) + """Generates lists of variables with compatible (i.e. alignable) dimensions and sizes.""" + alignable_dim_sizes = subsequences_of(dim_sizes) + # TODO don't hard code max number of variables + return draw(st.lists(variables(dims=alignable_dim_sizes), max_size=3)) +@st.composite def coordinate_variables( - dims: st.SearchStrategy[List[str]], -) -> st.SearchStrategy[List[xr.Variable]]: - # TODO specifically generate dimension coordinates - return _alignable_variables(dims) + draw: st.DrawFn, + dim_sizes: Mapping[str, int], +) -> st.SearchStrategy[Mapping[str, xr.Variable]]: + """ + Generates dicts of alignable Variable objects for use as coordinates. + + Differs from data_variables strategy in that it deliberately creates dimension coordinates + (i.e. 1D variables with the same name as a dimension) as well as non-dimension coordinates. + + Parameters + ---------- + dim_sizes + """ + dim_names = list(dim_sizes.keys()) + + all_coords = {} + + # Possibly generate 1D "dimension coordinates" - explicit possibility not to include amy helps with shrinking + if st.booleans(): + # TODO specifically generate dimension coordinates + # TODO first generate subset of dimension names + # TODO then generate 1D variables for each name + ... + + # Possibly generate ND "non-dimension coordinates" - explicit possibility not to include any helps with shrinking + if st.booleans(): + non_dim_coord_vars = draw(_alignable_variables(dim_sizes=dim_sizes)) + + # can't have same name as a dimension + valid_non_dim_coord_names = names.filter(lambda n: n not in dim_names) + # TODO do I actually need to draw from st.lists for this? + non_dim_coord_names = draw( + st.lists( + valid_non_dim_coord_names, + min_size=len(non_dim_coord_vars), + max_size=len(non_dim_coord_vars), + unique=True, + ) + ) + + non_dim_coords = {n: c for n, c in zip(non_dim_coord_names, non_dim_coord_vars)} + all_coords.update(non_dim_coords) + + return all_coords @st.composite @@ -324,14 +371,16 @@ def dataarrays( raise NotImplementedError() else: data = draw(np_arrays()) - dims = draw(dimension_names(min_ndims=data.ndim, max_ndims=data.ndim)) - coords = draw(coordinate_variables(dims=dims)) + dim_names = draw(dimension_names(min_ndims=data.ndim, max_ndims=data.ndim)) + dim_sizes = {n: l for n, l in zip(dim_names, data.shape)} + print(dim_sizes) + coords = draw(coordinate_variables(dim_sizes=dim_sizes)) return xr.DataArray( data=convert(data), coords=coords, name=name, - dims=dims, + dims=dim_names, attrs=attrs, ) @@ -339,6 +388,10 @@ def dataarrays( def data_variables( dims: st.SearchStrategy[List[str]], ) -> st.SearchStrategy[List[xr.Variable]]: + """ + Generates dicts of alignable Variable objects for use as Dataset data variables. + """ + # TODO these shouldn't have the same name as any dimensions or any coordinates... return _alignable_variables(dims) diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index bf382919442..57452d345d0 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -5,10 +5,13 @@ import pytest from hypothesis import given -from xarray import DataArray, Dataset +from xarray import DataArray, Dataset, merge from xarray.core.variable import Variable from xarray.testing.strategies import ( + coordinate_variables, + data_variables, dataarrays, + datasets, dimension_names, dimension_sizes, np_arrays, @@ -231,13 +234,34 @@ def test_ordering_preserved(self, data, seq): assert sorted(subsequence_of_dims) == subsequence_of_dims -@pytest.mark.xfail +class TestCoordinateVariablesStrategy: + @given(coordinate_variables(dim_sizes={"x": 2, "y": 3})) + def test_alignable(self, coord_vars): + + # TODO there must be a better way of checking align-ability than this + for v in coord_vars.values(): + if "x" in v.dims: + assert v.sizes["x"] == 2 + if "y" in v.dims: + assert v.sizes["y"] == 3 + if not set(v.dims).issubset({"x", "y"}): + assert False, v + + def test_generates_1d_dim_coords(self): + # TODO having a hypothesis.find(strat, predicate) would be very useful here + # see https://github.com/HypothesisWorks/hypothesis/issues/3436#issuecomment-1212369645 + ... + + def test_generates_non_dim_coords(self, coord_vars): + ... + + +# @pytest.mark.xfail class TestDataArraysStrategy: @given(dataarrays()) def test_given_nothing(self, da): print(da) assert isinstance(da, DataArray) - assert False @pytest.mark.xfail From 1688779c373ae83759df96c750bda55ef77b259d Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Mon, 15 Aug 2022 14:11:26 -0400 Subject: [PATCH 024/155] dataarrays strategy given nothing working! --- xarray/testing/strategies.py | 17 +++++++++++------ xarray/tests/test_strategies.py | 14 ++++++++++---- 2 files changed, 21 insertions(+), 10 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 489434eb99b..acc7c058def 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -310,7 +310,8 @@ def coordinate_variables( Parameters ---------- - dim_sizes + dim_sizes: Mapping of str to int + Sizes of dimensions to use for coordinates. """ dim_names = list(dim_sizes.keys()) @@ -318,10 +319,15 @@ def coordinate_variables( # Possibly generate 1D "dimension coordinates" - explicit possibility not to include amy helps with shrinking if st.booleans(): - # TODO specifically generate dimension coordinates - # TODO first generate subset of dimension names - # TODO then generate 1D variables for each name - ... + # first generate subset of dimension names - these set which dimension coords will be included + dim_coord_names_and_lengths = draw(subsequences_of(dim_sizes)) + + # then generate 1D variables for each name + dim_coords = { + n: draw(variables(dims=st.just({n: l}))) + for n, l in dim_coord_names_and_lengths.items() + } + all_coords.update(dim_coords) # Possibly generate ND "non-dimension coordinates" - explicit possibility not to include any helps with shrinking if st.booleans(): @@ -373,7 +379,6 @@ def dataarrays( data = draw(np_arrays()) dim_names = draw(dimension_names(min_ndims=data.ndim, max_ndims=data.ndim)) dim_sizes = {n: l for n, l in zip(dim_names, data.shape)} - print(dim_sizes) coords = draw(coordinate_variables(dim_sizes=dim_sizes)) return xr.DataArray( diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index 57452d345d0..62bdf0841ab 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -247,20 +247,26 @@ def test_alignable(self, coord_vars): if not set(v.dims).issubset({"x", "y"}): assert False, v + @given(st.data()) + def test_valid_set_of_coords(self, data): + coord_vars = data.draw(coordinate_variables(dim_sizes={"x": 2, "y": 3})) + + arr = data.draw(np_arrays(shape=(2, 3))) + da = DataArray(data=arr, coords=coord_vars, dims=["x", "y"]) + assert isinstance(da, DataArray) + def test_generates_1d_dim_coords(self): - # TODO having a hypothesis.find(strat, predicate) would be very useful here + # TODO having a `hypothesis.find(strat, predicate)` function would be very useful here # see https://github.com/HypothesisWorks/hypothesis/issues/3436#issuecomment-1212369645 ... - def test_generates_non_dim_coords(self, coord_vars): + def test_generates_non_dim_coords(self): ... -# @pytest.mark.xfail class TestDataArraysStrategy: @given(dataarrays()) def test_given_nothing(self, da): - print(da) assert isinstance(da, DataArray) From 0a29d320285d50881d18a357e9f7190f9b8c1a78 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Mon, 15 Aug 2022 15:04:18 -0400 Subject: [PATCH 025/155] improved docstrings --- doc/user-guide/testing.rst | 14 ++++++---- xarray/testing/strategies.py | 52 +++++++++++++++++++++++++++++++----- 2 files changed, 55 insertions(+), 11 deletions(-) diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index 85eda5efd01..75eaa94b857 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -50,13 +50,16 @@ These strategies are accessible in the :py:module::`xarray.testing.strategies` m .. currentmodule:: xarray .. autosummary:: - :toctree: generated/ testing.strategies.valid_dtypes testing.strategies.np_arrays + testing.strategies.names testing.strategies.dimension_names + testing.strategies.dimension_sizes testing.strategies.variables + testing.strategies.coordinate_variables testing.strategies.dataarrays + testing.strategies.data_variables testing.strategies.datasets testing.strategies.chunks testing.strategies.chunksizes @@ -67,14 +70,15 @@ Generating Examples To see an example of what each of these strategies might produce, you can call one followed by the `.example()` method, which is a general hypothesis method valid for all strategies +(TODO we should specify a seed to hypothesis so that the docs generate the same examples on every build) + .. ipython:: python import xarray.testing.strategies as xrst - # TODO change this to dataarray once written - xrst.variables().example() - xrst.variables().example() - xrst.variables().example() + xrst.dataarrays().example() + xrst.dataarrays().example() + xrst.dataarrays().example() You can see that calling `.example()` multiple times will generate different examples, giving you an idea of the wide range of data that the xarray strategies can generate. diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index acc7c058def..a6beb238e37 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -15,7 +15,6 @@ import hypothesis.extra.numpy as npst import hypothesis.strategies as st import numpy as np -import pandas as pd from hypothesis import assume from hypothesis.internal.validation import check_valid_sizes @@ -169,7 +168,7 @@ def variables( Generates arbitrary xarray.Variable objects. Follows the signature of the xarray.Variable constructor, but you can also pass alternative strategies to generate - either numpy-like array data or dimension names. Passing both at once is forbidden. + either numpy-like array data or dimension names. Passing nothing will generate a completely arbitrary Variable (backed by a numpy array). @@ -177,11 +176,17 @@ def variables( ---------- data: Strategy generating array-likes, optional Default is to generate numpy data of arbitrary shape, values and dtype. - dims: Strategy which generates sequence of strings, optional + dims: Strategy for generating the dimensions, optional + Can either be a strategy for generating a list of string dimension names, + or a strategy for generating a mapping of string dimension names to integer lengths along each dimension. + If provided in the former form the lengths of the returned Variable will either be determined from the + data argument if given or arbitrarily generated if not. Default is to generate arbitrary dimension names for each axis in data. attrs: Strategy which generates dicts, optional convert: Callable Function which accepts one numpy array and returns one numpy-like array of the same shape. + Applied to the data after it is drawn from the `data` strategy provided. + Useful for converting numpy arrays to other types of arrays, e.g. sparse arrays. Default is a no-op. """ @@ -355,9 +360,7 @@ def coordinate_variables( def dataarrays( draw: st.DrawFn, data: st.SearchStrategy[T_Array] = None, - coords: Union[ - Sequence[Union[xr.DataArray, pd.Index]], Mapping[str, xr.Variable] - ] = None, + coords: Mapping[str, xr.Variable] = None, dims: Union[ st.SearchStrategy[List[str]], st.SearchStrategy[Mapping[str, int]] ] = None, @@ -365,17 +368,54 @@ def dataarrays( attrs: st.SearchStrategy[Mapping] = None, convert: Callable[[np.ndarray], T_Array] = lambda a: a, ) -> st.SearchStrategy[xr.DataArray]: + """ + Generates arbitrary xarray.DataArray objects. + + Follows the basic signature of the xarray.DataArray constructor, but you can also pass alternative strategies to + generate either numpy-like array data, dimensions, or coordinates. + + Passing nothing will generate a completely arbitrary DataArray (backed by a numpy array). + + Parameters + ---------- + data: Strategy generating array-likes, optional + Default is to generate numpy data of arbitrary shape, values and dtype. + coords: Strategy generating mappings from coordinate names to xr.Variables objects, optional + Default is to generate an arbitrary combination of both dimension and non-dimension coordinates, + with sizes matching data and/or dims, but arbitrary names, dtypes, and values. + dims: Strategy for generating the dimensions, optional + Can either be a strategy for generating a list of string dimension names, + or a strategy for generating a mapping of string dimension names to integer lengths along each dimension. + If provided in the former form the lengths of the returned Variable will either be determined from the + data argument if given or arbitrarily generated if not. + Default is to generate arbitrary dimension names for each axis in data. + name: Strategy for generating a string name, optional + Default is to use the `names` strategy, or to create an unnamed DataArray. + attrs: Strategy which generates dicts, optional + convert: Callable + Function which accepts one numpy array and returns one numpy-like array of the same shape. + Applied to the data after it is drawn from the `data` strategy provided. + Useful for converting numpy arrays to other types of arrays, e.g. sparse arrays. + Default is a no-op. + """ if name is None: name = draw(st.none() | names) + if coords is not None: + raise NotImplementedError() + if data is not None and dims is None: raise NotImplementedError() + elif data is None and dims is not None: raise NotImplementedError() + elif data is not None and dims is None: raise NotImplementedError() + else: + # nothing provided, so generate everything consistently by drawing dims to match data, and coords to match both data = draw(np_arrays()) dim_names = draw(dimension_names(min_ndims=data.ndim, max_ndims=data.ndim)) dim_sizes = {n: l for n, l in zip(dim_names, data.shape)} From 3259849835ebd09d8b289e4333e8289c9e3c0a66 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Mon, 15 Aug 2022 16:05:25 -0400 Subject: [PATCH 026/155] datasets strategy works (given nothing) --- xarray/testing/strategies.py | 140 ++++++++++++++++++-------------- xarray/tests/test_strategies.py | 8 +- 2 files changed, 87 insertions(+), 61 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index a6beb238e37..d2b14dd7a58 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -19,9 +19,7 @@ from hypothesis.internal.validation import check_valid_sizes import xarray as xr -from xarray.core.utils import is_dict_like -from . import utils __all__ = [ "valid_dtypes", @@ -350,7 +348,7 @@ def coordinate_variables( ) ) - non_dim_coords = {n: c for n, c in zip(non_dim_coord_names, non_dim_coord_vars)} + non_dim_coords = {n: v for n, v in zip(non_dim_coord_names, non_dim_coord_vars)} all_coords.update(non_dim_coords) return all_coords @@ -379,7 +377,7 @@ def dataarrays( Parameters ---------- data: Strategy generating array-likes, optional - Default is to generate numpy data of arbitrary shape, values and dtype. + Default is to generate numpy data of arbitrary shape, values and dtypes. coords: Strategy generating mappings from coordinate names to xr.Variables objects, optional Default is to generate an arbitrary combination of both dimension and non-dimension coordinates, with sizes matching data and/or dims, but arbitrary names, dtypes, and values. @@ -388,13 +386,13 @@ def dataarrays( or a strategy for generating a mapping of string dimension names to integer lengths along each dimension. If provided in the former form the lengths of the returned Variable will either be determined from the data argument if given or arbitrarily generated if not. - Default is to generate arbitrary dimension names for each axis in data. + Default is to generate arbitrary dimension sizes, or arbitrary dimension names for each axis in data. name: Strategy for generating a string name, optional Default is to use the `names` strategy, or to create an unnamed DataArray. attrs: Strategy which generates dicts, optional convert: Callable Function which accepts one numpy array and returns one numpy-like array of the same shape. - Applied to the data after it is drawn from the `data` strategy provided. + Applied to the data after it is drawn from the `data` strategy. Useful for converting numpy arrays to other types of arrays, e.g. sparse arrays. Default is a no-op. """ @@ -430,77 +428,99 @@ def dataarrays( ) +@st.composite def data_variables( - dims: st.SearchStrategy[List[str]], -) -> st.SearchStrategy[List[xr.Variable]]: + draw: st.DrawFn, + dim_sizes: Mapping[str, int], + allowed_names: st.SearchStrategy[str] = None, +) -> st.SearchStrategy[Mapping[str, xr.Variable]]: """ Generates dicts of alignable Variable objects for use as Dataset data variables. + + Parameters + ---------- + dim_sizes: Mapping of str to int + Sizes of dimensions to use for variables. + allowed_names: Strategy generating strings + Allowed names for data variables. Needed to avoid conflict with names of coordinate variables & dimensions. """ # TODO these shouldn't have the same name as any dimensions or any coordinates... - return _alignable_variables(dims) - - -@st.composite -def datasets( - draw: st.DrawFn, - create_data: Callable, - *, - min_dims=1, - max_dims=3, - min_size=1, - max_size=3, - min_vars=1, - max_vars=3, -) -> st.SearchStrategy[xr.Dataset]: - - dtypes = st.just(draw(valid_dtypes)) - names = st.text(min_size=1) - sizes = dimension_sizes( - min_size=min_size, max_size=max_size, min_dims=min_dims, max_dims=max_dims - ) + vars = draw(_alignable_variables(dim_sizes=dim_sizes)) + dim_names = list(dim_sizes.keys()) - data_vars = sizes.flatmap( - lambda s: st.dictionaries( - keys=names.filter(lambda n: n not in dict(s)), - values=variables(create_data, sizes=s, dtypes=dtypes), - min_size=min_vars, - max_size=max_vars, + # can't have same name as a dimension + # TODO this is also used in coordinate_variables so refactor it out into separate function + valid_var_names = allowed_names.filter(lambda n: n not in dim_names) + # TODO do I actually need to draw from st.lists for this? + var_names = draw( + st.lists( + valid_var_names, + min_size=len(vars), + max_size=len(vars), + unique=True, ) ) - return xr.Dataset(data_vars=draw(data_vars)) + data_vars = {n: v for n, v in zip(var_names, vars)} + return data_vars -def valid_axis(ndim) -> st.SearchStrategy[Union[None, int]]: - if ndim == 0: - return st.none() | st.just(0) - return st.none() | st.integers(-ndim, ndim - 1) - - -def valid_axes(ndim) -> st.SearchStrategy[Union[None, int, Tuple[int, ...]]]: - return valid_axis(ndim) | npst.valid_tuple_axes(ndim, min_size=1) +@st.composite +def datasets( + draw: st.DrawFn, + data_vars: st.SearchStrategy[Mapping[str, xr.Variable]] = None, + coords: Mapping[str, xr.Variable] = None, + dims: Union[ + st.SearchStrategy[List[str]], st.SearchStrategy[Mapping[str, int]] + ] = None, + attrs: st.SearchStrategy[Mapping] = None, + convert: Callable[[np.ndarray], T_Array] = lambda a: a, +) -> st.SearchStrategy[xr.Dataset]: + """ + Generates arbitrary xarray.Dataset objects. + Follows the basic signature of the xarray.Dataset constructor, but you can also pass alternative strategies to + generate either numpy-like array data variables, dimensions, or coordinates. -def valid_dim(dims) -> st.SearchStrategy[str]: - if not isinstance(dims, list): - dims = [dims] + Passing nothing will generate a completely arbitrary Dataset (backed by numpy arrays). - ndim = len(dims) - axis = valid_axis(ndim) - return axis.map(lambda axes: utils.valid_dims_from_axes(dims, axes)) + Parameters + ---------- + data_vars: Strategy generating mappings from variable names to xr.Variable objects, optional + Default is to generate an arbitrary combination of compatible variables with sizes matching dims, + but arbitrary names, dtypes, and values. + coords: Strategy generating mappings from coordinate names to xr.Variable objects, optional + Default is to generate an arbitrary combination of both dimension and non-dimension coordinates, + with sizes matching data_vars and/or dims, but arbitrary names, dtypes, and values. + dims: Strategy for generating the dimensions, optional + Can either be a strategy for generating a list of string dimension names, + or a strategy for generating a mapping of string dimension names to integer lengths along each dimension. + If provided in the former form the lengths of the returned Variable will either be determined from the + data argument if given or arbitrarily generated if not. + Default is to generate arbitrary dimension sizes. + attrs: Strategy which generates dicts, optional + convert: Callable + Function which accepts one numpy array and returns one numpy-like array of the same shape. + Applied to the data variables after they are drawn from the `data_vars` strategy. + Useful for converting numpy arrays to other types of arrays, e.g. sparse arrays. + Default is a no-op. + """ + if any(arg is not None for arg in [data_vars, coords, dims, attrs]): + raise NotImplementedError() + else: + # nothing provided, so generate everything consistently by drawing dims to match data, and coords to match both + dim_sizes = draw(dimension_sizes()) + coords = draw(coordinate_variables(dim_sizes=dim_sizes)) + coord_names = list(coords.keys()) + data_var_names = names.filter(lambda n: n not in coord_names) + data_vars = draw( + data_variables(dim_sizes=dim_sizes, allowed_names=data_var_names) + ) -def valid_dims(dims) -> st.SearchStrategy[xr.DataArray]: - if is_dict_like(dims): - dims = list(dims.keys()) - elif isinstance(dims, tuple): - dims = list(dims) - elif not isinstance(dims, list): - dims = [dims] + # TODO convert data_vars - ndim = len(dims) - axes = valid_axes(ndim) - return axes.map(lambda axes: utils.valid_dims_from_axes(dims, axes)) + return xr.Dataset(data_vars=data_vars, coords=coords, attrs=attrs) @st.composite diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index 62bdf0841ab..ade995be2c0 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -5,7 +5,7 @@ import pytest from hypothesis import given -from xarray import DataArray, Dataset, merge +from xarray import DataArray, Dataset from xarray.core.variable import Variable from xarray.testing.strategies import ( coordinate_variables, @@ -270,6 +270,12 @@ def test_given_nothing(self, da): assert isinstance(da, DataArray) +class TestDatasetsStrategy: + @given(datasets()) + def test_given_nothing(self, ds): + assert isinstance(ds, Dataset) + + @pytest.mark.xfail @given(st.data()) def test_chained_chunking_example(data): From d76e5b6b6efc908a1eea0eb55e999133fcc0642a Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 15 Aug 2022 20:08:37 +0000 Subject: [PATCH 027/155] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/testing/strategies.py | 1 - 1 file changed, 1 deletion(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index d2b14dd7a58..a327db8b365 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -20,7 +20,6 @@ import xarray as xr - __all__ = [ "valid_dtypes", "np_arrays", From c25940c1820c4bedb0c832d0fa3b2c94b5dd6cff Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Mon, 15 Aug 2022 20:46:34 -0400 Subject: [PATCH 028/155] pass dims or data to dataarrays() strategy --- doc/user-guide/testing.rst | 52 +++++++++++++++++++++++---------- xarray/testing/strategies.py | 43 +++++++++++++++++++++++---- xarray/tests/test_strategies.py | 30 +++++++++++++++++++ 3 files changed, 105 insertions(+), 20 deletions(-) diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index 75eaa94b857..b56bb457f99 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -12,13 +12,6 @@ Testing your code np.random.seed(123456) -.. _asserts: - -Asserts -------- - -TODO - .. _hypothesis: Hypothesis testing @@ -67,8 +60,8 @@ These strategies are accessible in the :py:module::`xarray.testing.strategies` m Generating Examples ~~~~~~~~~~~~~~~~~~~ -To see an example of what each of these strategies might produce, you can call one followed by the `.example()` method, -which is a general hypothesis method valid for all strategies +To see an example of what each of these strategies might produce, you can call one followed by the ``.example()`` method, +which is a general hypothesis method valid for all strategies. (TODO we should specify a seed to hypothesis so that the docs generate the same examples on every build) @@ -80,14 +73,22 @@ which is a general hypothesis method valid for all strategies xrst.dataarrays().example() xrst.dataarrays().example() -You can see that calling `.example()` multiple times will generate different examples, giving you an idea of the wide +You can see that calling ``.example()`` multiple times will generate different examples, giving you an idea of the wide range of data that the xarray strategies can generate. -# TODO simple test example +In your tests however you should not use ``.example()`` - instead you should parameterize your tests with the +``hypothesis.given`` decorator: + +# TODO finishsimple test example .. ipython:: python - import hypothesis.strategies as st + from hypothesis import given + + @given(xrst.dataarrays()) + def test_something(da): + ... + Chaining Strategies ~~~~~~~~~~~~~~~~~~~ @@ -96,8 +97,12 @@ Xarray's strategies can accept other strategies as arguments, allowing you to cu examples. .. ipython:: python + :okexcept: - xrst.variables(data=xrst.np_arrays(shape=(3, 4))) + # generate a DataArray with shape (3, 4), but all other details still arbitrary + xrst.dataarrays( + data=xrst.np_arrays(shape=(3, 4), dtype=np.dtype("int32")) + ).example() This also works with strategies defined in other packages, for example the ``chunks`` strategy defined in ``dask.array.strategies``. @@ -107,10 +112,11 @@ Fixing Arguments ~~~~~~~~~~~~~~~~ If you want to fix one aspect of the data structure, whilst allowing variation in the generated examples -over all other aspects, then use ``st.just()``. +over all other aspects, then use ``hypothesis.strategies.just()``. .. ipython:: python - :okexcept: + + import hypothesis.strategies as st # Generates only dataarrays with dimensions ["x", "y"] xrst.dataarrays(dims=st.just(["x", "y"])).example() @@ -118,6 +124,22 @@ over all other aspects, then use ``st.just()``. (This is technically another example of chaining strategies - ``hypothesis.strategies.just`` is simply a special strategy that just contains a single example.) +To fix the length of dimensions you can instead pass `dims` as a mapping of dimension names to lengths +(i.e. following xarray object's ``.sizes()`` property), e.g. + +.. ipython:: python + + # Generates only dataarrays with dimensions ["x", "y"], of lengths 2 & 3 respectively + xrst.dataarrays(dims=st.just({"x": 2, "y": 3})).example() + +You can also use this to specify that you want examples which are missing some part of the data structure, for instance + +.. ipython:: python + :okexcept: + + # Generates only dataarrays with no coordinates + xrst.dataarrays(coords=st.just({})).example() + Duck-type Conversion ~~~~~~~~~~~~~~~~~~~~ diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index d2b14dd7a58..cf7c08f7d0d 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -20,7 +20,6 @@ import xarray as xr - __all__ = [ "valid_dtypes", "np_arrays", @@ -286,6 +285,7 @@ def element_mask() -> List[bool]: return {k: v for k, v, include in element_includes if include} else: element_includes = zip(elements, element_mask()) + # TODO this sorted call doesn't actually guarantee elements are sorted in same order they were supplied in return sorted(element for element, include in element_includes if include) @@ -404,13 +404,44 @@ def dataarrays( raise NotImplementedError() if data is not None and dims is None: - raise NotImplementedError() + # no dims -> generate dims to match data + data = draw(data) + dim_names = draw(dimension_names(min_ndims=data.ndim, max_ndims=data.ndim)) + dim_sizes = {n: l for n, l in zip(dim_names, data.shape)} + coords = draw(coordinate_variables(dim_sizes=dim_sizes)) elif data is None and dims is not None: - raise NotImplementedError() + # no data -> generate data to match dims + dims = draw(dims) + if isinstance(dims, List): + dim_names = dims + valid_shapes = npst.array_shapes(min_dims=len(dims), max_dims=len(dims)) + data = draw(np_arrays(shape=draw(valid_shapes))) + dim_sizes = {n: l for n, l in zip(dims, data.shape)} + coords = draw(coordinate_variables(dim_sizes=dim_sizes)) - elif data is not None and dims is None: - raise NotImplementedError() + else: + # should be a mapping of form {dim_names: lengths} + dim_names, shape = list(dims.keys()), tuple(dims.values()) + data = draw(np_arrays(shape=shape)) + coords = draw(coordinate_variables(dim_sizes=dims)) + + elif data is not None and dims is not None: + # both data and dims provided -> check drawn examples are compatible + dims = draw(dims) + if isinstance(dims, List): + dim_names = dims + data = draw(data) + assume(data.ndim == len(dims)) + dim_sizes = {n: l for n, l in zip(dims, data.shape)} + else: + # should be a mapping of form {dim_names: lengths} + data = draw(data) + dim_sizes = dims + dim_names, shape = list(dims.keys()), tuple(dims.values()) + assume(data.shape == shape) + + coords = draw(coordinate_variables(dim_sizes=dim_sizes)) else: # nothing provided, so generate everything consistently by drawing dims to match data, and coords to match both @@ -511,9 +542,11 @@ def datasets( else: # nothing provided, so generate everything consistently by drawing dims to match data, and coords to match both dim_sizes = draw(dimension_sizes()) + # TODO allow for no coordinate variables coords = draw(coordinate_variables(dim_sizes=dim_sizes)) coord_names = list(coords.keys()) data_var_names = names.filter(lambda n: n not in coord_names) + # TODO allow for no data variables data_vars = draw( data_variables(dim_sizes=dim_sizes, allowed_names=data_var_names) ) diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index ade995be2c0..8d9b3e085de 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -269,6 +269,36 @@ class TestDataArraysStrategy: def test_given_nothing(self, da): assert isinstance(da, DataArray) + @given(st.data()) + def test_given_dims(self, data): + da = data.draw(dataarrays(dims=st.just(["x", "y"]))) + assert da.dims == ("x", "y") + + da = data.draw(dataarrays(dims=st.just({"x": 2, "y": 3}))) + assert da.sizes == {"x": 2, "y": 3} + + @given(st.data()) + def test_given_data(self, data): + shape = (2, 3) + arrs = np_arrays(shape=shape) + da = data.draw(dataarrays(data=arrs)) + + assert da.shape == shape + + @given(st.data()) + def test_given_data_and_dims(self, data): + arrs = np_arrays(shape=(2, 3)) + dims = dimension_names(min_ndims=2) + da = data.draw(dataarrays(data=arrs, dims=dims)) + + assert da.shape == (2, 3) + + arrs = np_arrays(shape=(3, 4)) + dims = st.just({"x": 3, "y": 4}) + da = data.draw(dataarrays(data=arrs, dims=dims)) + + assert da.sizes == {"x": 3, "y": 4} + class TestDatasetsStrategy: @given(datasets()) From cd7b0654b7f9f8fbbc12dfb49e4b71218e247542 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Tue, 16 Aug 2022 09:58:22 -0400 Subject: [PATCH 029/155] importorskip hypothesis in tests --- xarray/testing/strategies.py | 20 ++++++++++++++++++++ xarray/tests/test_strategies.py | 8 ++++++-- 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index cf7c08f7d0d..ebd9532de40 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -46,6 +46,8 @@ def elements(dtype) -> st.SearchStrategy[Any]: """ Generates scalar elements to go in a numpy-like array. + + Requires the hypothesis package to be installed. """ max_value = 100 min_value = 0 if dtype.kind == "u" else -max_value @@ -64,6 +66,8 @@ def np_arrays( """ Generates arbitrary numpy arrays with xarray-compatible dtypes. + Requires the hypothesis package to be installed. + Parameters ---------- shape @@ -94,6 +98,8 @@ def dimension_names( """ Generates an arbitrary list of valid dimension names. + Requires the hypothesis package to be installed. + Parameters ---------- min_ndims @@ -119,6 +125,8 @@ def dimension_sizes( """ Generates an arbitrary mapping from dimension names to lengths. + Requires the hypothesis package to be installed. + Parameters ---------- min_ndims: int, optional @@ -169,6 +177,8 @@ def variables( Passing nothing will generate a completely arbitrary Variable (backed by a numpy array). + Requires the hypothesis package to be installed. + Parameters ---------- data: Strategy generating array-likes, optional @@ -259,6 +269,8 @@ def subsequences_of( Order is guaranteed to be preserved in the result. + Requires the hypothesis package to be installed. + Parameters ---------- elements: Elements from which to construct the subsequence @@ -311,6 +323,8 @@ def coordinate_variables( Differs from data_variables strategy in that it deliberately creates dimension coordinates (i.e. 1D variables with the same name as a dimension) as well as non-dimension coordinates. + Requires the hypothesis package to be installed. + Parameters ---------- dim_sizes: Mapping of str to int @@ -374,6 +388,8 @@ def dataarrays( Passing nothing will generate a completely arbitrary DataArray (backed by a numpy array). + Requires the hypothesis package to be installed. + Parameters ---------- data: Strategy generating array-likes, optional @@ -468,6 +484,8 @@ def data_variables( """ Generates dicts of alignable Variable objects for use as Dataset data variables. + Requires the hypothesis package to be installed. + Parameters ---------- dim_sizes: Mapping of str to int @@ -515,6 +533,8 @@ def datasets( Passing nothing will generate a completely arbitrary Dataset (backed by numpy arrays). + Requires the hypothesis package to be installed. + Parameters ---------- data_vars: Strategy generating mappings from variable names to xr.Variable objects, optional diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index 8d9b3e085de..b85342b0902 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -1,8 +1,12 @@ -import hypothesis.extra.numpy as npst -import hypothesis.strategies as st import numpy as np import numpy.testing as npt import pytest + +pytest.importorskip("hypothesis") +# isort: split + +import hypothesis.extra.numpy as npst +import hypothesis.strategies as st from hypothesis import given from xarray import DataArray, Dataset From 8e548b13518b1db31e48740ae088f952e4912a1c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 16 Aug 2022 14:00:20 +0000 Subject: [PATCH 030/155] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- doc/user-guide/testing.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index b56bb457f99..40eb5f99f05 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -85,6 +85,7 @@ In your tests however you should not use ``.example()`` - instead you should par from hypothesis import given + @given(xrst.dataarrays()) def test_something(da): ... From d1487d481da0292c122ee51a7383f0fe3a4079e7 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Tue, 16 Aug 2022 10:49:57 -0400 Subject: [PATCH 031/155] added warning about inefficient example generation --- doc/user-guide/testing.rst | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index b56bb457f99..da0d5ac26de 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -85,6 +85,8 @@ In your tests however you should not use ``.example()`` - instead you should par from hypothesis import given +.. ipython:: python + @given(xrst.dataarrays()) def test_something(da): ... @@ -107,6 +109,35 @@ examples. This also works with strategies defined in other packages, for example the ``chunks`` strategy defined in ``dask.array.strategies``. +.. warning:: + Passing multiple different strategies to the same constructor can lead to poor example generation performance. + + This is because in order to construct a valid xarray object to return, our strategies must check that the + variables / dimensions / coordinates are mutually compatible. We do this using ``hypothesis.assume``, which throws + away any generated examples not meeting the required condition. + + Therefore if you pass multiple custom strategies to a strategy constructor which are not compatible in enough cases, + most of the examples they generate will be mutually incompatible. This will likely lead to poor example generation + performance, manifesting as a ``hypothesis.errors.FailedHealthCheck`` being raised. For example: + + .. code-block:: + + @given(st.data()) + def test_something_else_inefficiently(data): + arrs = xrst.np_arrays() # generates arrays of any shape + dims = xrst.dimension_names() # generates lists of any number of dimensions + + # Drawing examples from this strategy is likely to have poor performance + var = data.draw(xrst.variables(data=arrs, dims=dims)) + + assert ... + + Here we have passed custom strategies which won't often be compatible: only rarely will the array's ``ndims`` + correspond to the number of dimensions drawn. + + To avoid this problem either allow xarray's strategies to automatically generate compatible data for you, or be more + selective about cases when passing multiple custom strategies to the same constructor. + Fixing Arguments ~~~~~~~~~~~~~~~~ @@ -125,7 +156,7 @@ over all other aspects, then use ``hypothesis.strategies.just()``. strategy that just contains a single example.) To fix the length of dimensions you can instead pass `dims` as a mapping of dimension names to lengths -(i.e. following xarray object's ``.sizes()`` property), e.g. +(i.e. following xarray objects' ``.sizes()`` property), e.g. .. ipython:: python From 8bac610e25fa39d80065599533467b6079a68b0f Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 17 Aug 2022 15:41:25 -0400 Subject: [PATCH 032/155] remove TODO about deterministic examples in docs --- doc/user-guide/testing.rst | 2 -- 1 file changed, 2 deletions(-) diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index da0d5ac26de..a26a342f4fc 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -63,8 +63,6 @@ Generating Examples To see an example of what each of these strategies might produce, you can call one followed by the ``.example()`` method, which is a general hypothesis method valid for all strategies. -(TODO we should specify a seed to hypothesis so that the docs generate the same examples on every build) - .. ipython:: python import xarray.testing.strategies as xrst From cf3beb58d33ed369ab587323354bd14373d18c03 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 17 Aug 2022 15:44:50 -0400 Subject: [PATCH 033/155] un-restrict names strategy --- xarray/testing/strategies.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index ebd9532de40..a8c8ce32253 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -87,7 +87,7 @@ def np_arrays( return draw(npst.arrays(dtype=dtype, shape=shape, elements=elements(dtype))) -names = st.text(alphabet=string.ascii_lowercase, min_size=1, max_size=3) +names = st.text(st.characters(), min_size=1) names.__doc__ = """Generates arbitrary string names for dimensions / variables.""" From d991357860c1395c8aefb7d73c6508107c1a29b1 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 17 Aug 2022 19:46:58 +0000 Subject: [PATCH 034/155] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/testing/strategies.py | 1 - 1 file changed, 1 deletion(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index a8c8ce32253..a663203a713 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -1,4 +1,3 @@ -import string from typing import ( Any, Callable, From a6405cff9e9137f72dfbd506de46b33c7ef66a25 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 17 Aug 2022 15:53:38 -0400 Subject: [PATCH 035/155] removed convert kwarg --- doc/user-guide/testing.rst | 6 +++--- xarray/testing/strategies.py | 38 +++------------------------------ xarray/tests/test_strategies.py | 7 ------ 3 files changed, 6 insertions(+), 45 deletions(-) diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index a26a342f4fc..018c17c5aad 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -170,7 +170,7 @@ You can also use this to specify that you want examples which are missing some p xrst.dataarrays(coords=st.just({})).example() -Duck-type Conversion -~~~~~~~~~~~~~~~~~~~~ +Creating Duck-type Arrays +~~~~~~~~~~~~~~~~~~~~~~~~~ -# TODO converting to duckarrays +# TODO creating duckarrays by passing custom strategies to data arg diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index a8c8ce32253..dae42c38d77 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -1,16 +1,4 @@ -import string -from typing import ( - Any, - Callable, - List, - Mapping, - Optional, - Sequence, - Set, - Tuple, - TypeVar, - Union, -) +from typing import Any, List, Mapping, Optional, Sequence, Set, Tuple, TypeVar, Union import hypothesis.extra.numpy as npst import hypothesis.strategies as st @@ -167,7 +155,6 @@ def variables( st.SearchStrategy[List[str]], st.SearchStrategy[Mapping[str, int]] ] = None, attrs: st.SearchStrategy[Mapping] = None, - convert: Callable[[np.ndarray], T_Array] = lambda a: a, ) -> st.SearchStrategy[xr.Variable]: """ Generates arbitrary xarray.Variable objects. @@ -190,11 +177,6 @@ def variables( data argument if given or arbitrarily generated if not. Default is to generate arbitrary dimension names for each axis in data. attrs: Strategy which generates dicts, optional - convert: Callable - Function which accepts one numpy array and returns one numpy-like array of the same shape. - Applied to the data after it is drawn from the `data` strategy provided. - Useful for converting numpy arrays to other types of arrays, e.g. sparse arrays. - Default is a no-op. """ if any( @@ -249,7 +231,7 @@ def variables( # TODO autogenerate some attributes ... - return xr.Variable(dims=dims, data=convert(data), attrs=attrs) + return xr.Variable(dims=dims, data=data, attrs=attrs) El = TypeVar("El") @@ -378,7 +360,6 @@ def dataarrays( ] = None, name: st.SearchStrategy[Union[str, None]] = None, attrs: st.SearchStrategy[Mapping] = None, - convert: Callable[[np.ndarray], T_Array] = lambda a: a, ) -> st.SearchStrategy[xr.DataArray]: """ Generates arbitrary xarray.DataArray objects. @@ -406,11 +387,6 @@ def dataarrays( name: Strategy for generating a string name, optional Default is to use the `names` strategy, or to create an unnamed DataArray. attrs: Strategy which generates dicts, optional - convert: Callable - Function which accepts one numpy array and returns one numpy-like array of the same shape. - Applied to the data after it is drawn from the `data` strategy. - Useful for converting numpy arrays to other types of arrays, e.g. sparse arrays. - Default is a no-op. """ if name is None: @@ -467,7 +443,7 @@ def dataarrays( coords = draw(coordinate_variables(dim_sizes=dim_sizes)) return xr.DataArray( - data=convert(data), + data=data, coords=coords, name=name, dims=dim_names, @@ -523,7 +499,6 @@ def datasets( st.SearchStrategy[List[str]], st.SearchStrategy[Mapping[str, int]] ] = None, attrs: st.SearchStrategy[Mapping] = None, - convert: Callable[[np.ndarray], T_Array] = lambda a: a, ) -> st.SearchStrategy[xr.Dataset]: """ Generates arbitrary xarray.Dataset objects. @@ -550,11 +525,6 @@ def datasets( data argument if given or arbitrarily generated if not. Default is to generate arbitrary dimension sizes. attrs: Strategy which generates dicts, optional - convert: Callable - Function which accepts one numpy array and returns one numpy-like array of the same shape. - Applied to the data variables after they are drawn from the `data_vars` strategy. - Useful for converting numpy arrays to other types of arrays, e.g. sparse arrays. - Default is a no-op. """ if any(arg is not None for arg in [data_vars, coords, dims, attrs]): @@ -571,8 +541,6 @@ def datasets( data_variables(dim_sizes=dim_sizes, allowed_names=data_var_names) ) - # TODO convert data_vars - return xr.Dataset(data_vars=data_vars, coords=coords, attrs=attrs) diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index b85342b0902..25034280814 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -148,13 +148,6 @@ def test_given_fixed_sizes_and_arbitrary_data(self, data): assert var.shape == (2, 3) - @given(st.data()) - def test_convert(self, data): - arr = st.just(np.asarray([1, 2, 3])) - var = data.draw(variables(data=arr, convert=lambda x: x * 2)) - - npt.assert_equal(var.data, np.asarray([2, 4, 6])) - # All from the unfinished PR https://github.com/HypothesisWorks/hypothesis/pull/1533 class TestSubsequencesOfStrategy: From 3609a3404fb78cf5f59ef7e348055d351234339e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 17 Aug 2022 19:55:50 +0000 Subject: [PATCH 036/155] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/testing/strategies.py | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 1b57b09a579..dae42c38d77 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -1,14 +1,4 @@ -from typing import ( - Any, - List, - Mapping, - Optional, - Sequence, - Set, - Tuple, - TypeVar, - Union, -) +from typing import Any, List, Mapping, Optional, Sequence, Set, Tuple, TypeVar, Union import hypothesis.extra.numpy as npst import hypothesis.strategies as st From 63ad529d4b02e3aeebed16f48c3791b6149700e0 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 17 Aug 2022 16:47:04 -0400 Subject: [PATCH 037/155] avoid using subsequences_of --- xarray/testing/strategies.py | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 1b57b09a579..37ecf5e00d1 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -1,14 +1,4 @@ -from typing import ( - Any, - List, - Mapping, - Optional, - Sequence, - Set, - Tuple, - TypeVar, - Union, -) +from typing import Any, List, Mapping, Optional, Sequence, Set, Tuple, TypeVar, Union import hypothesis.extra.numpy as npst import hypothesis.strategies as st @@ -299,9 +289,17 @@ def _alignable_variables( dim_sizes: Mapping[str, int], ) -> st.SearchStrategy[List[xr.Variable]]: """Generates lists of variables with compatible (i.e. alignable) dimensions and sizes.""" - alignable_dim_sizes = subsequences_of(dim_sizes) + + # TODO refactor this out into separate function + if dim_sizes: + dims = list(dim_sizes.keys()) + subset_dims = draw(st.lists(st.sampled_from(dims), unique=True)) + alignable_dim_sizes = {d: dim_sizes[d] for d in subset_dims} + else: + alignable_dim_sizes = {} + # TODO don't hard code max number of variables - return draw(st.lists(variables(dims=alignable_dim_sizes), max_size=3)) + return draw(st.lists(variables(dims=st.just(alignable_dim_sizes)), max_size=3)) @st.composite @@ -327,9 +325,10 @@ def coordinate_variables( all_coords = {} # Possibly generate 1D "dimension coordinates" - explicit possibility not to include amy helps with shrinking - if st.booleans(): + if dim_names and st.booleans(): # first generate subset of dimension names - these set which dimension coords will be included - dim_coord_names_and_lengths = draw(subsequences_of(dim_sizes)) + subset_dims = draw(st.lists(st.sampled_from(dim_names), unique=True)) + dim_coord_names_and_lengths = {d: dim_sizes[d] for d in subset_dims} # then generate 1D variables for each name dim_coords = { @@ -344,7 +343,6 @@ def coordinate_variables( # can't have same name as a dimension valid_non_dim_coord_names = names.filter(lambda n: n not in dim_names) - # TODO do I actually need to draw from st.lists for this? non_dim_coord_names = draw( st.lists( valid_non_dim_coord_names, From 4ffbcbd5e5726060bb02d4fee9ed504b6e93b19e Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 17 Aug 2022 16:59:11 -0400 Subject: [PATCH 038/155] refactored into separate function for unique subset of dims --- xarray/testing/strategies.py | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 37ecf5e00d1..71edbc1fc75 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -1,4 +1,15 @@ -from typing import Any, List, Mapping, Optional, Sequence, Set, Tuple, TypeVar, Union +from typing import ( + Any, + Hashable, + List, + Mapping, + Optional, + Sequence, + Set, + Tuple, + TypeVar, + Union, +) import hypothesis.extra.numpy as npst import hypothesis.strategies as st @@ -283,6 +294,14 @@ def element_mask() -> List[bool]: return sorted(element for element, include in element_includes if include) +@st.composite +def _unique_subset_of( + draw: st.DrawFn, d: Mapping[Hashable, Any] +) -> st.SearchStrategy[Mapping[Hashable, Any]]: + subset_keys = draw(st.lists(st.sampled_from(list(d.keys())), unique=True)) + return {k: d[k] for k in subset_keys} + + @st.composite def _alignable_variables( draw: st.DrawFn, @@ -290,13 +309,7 @@ def _alignable_variables( ) -> st.SearchStrategy[List[xr.Variable]]: """Generates lists of variables with compatible (i.e. alignable) dimensions and sizes.""" - # TODO refactor this out into separate function - if dim_sizes: - dims = list(dim_sizes.keys()) - subset_dims = draw(st.lists(st.sampled_from(dims), unique=True)) - alignable_dim_sizes = {d: dim_sizes[d] for d in subset_dims} - else: - alignable_dim_sizes = {} + alignable_dim_sizes = draw(_unique_subset_of(dim_sizes)) if dim_sizes else {} # TODO don't hard code max number of variables return draw(st.lists(variables(dims=st.just(alignable_dim_sizes)), max_size=3)) @@ -327,8 +340,7 @@ def coordinate_variables( # Possibly generate 1D "dimension coordinates" - explicit possibility not to include amy helps with shrinking if dim_names and st.booleans(): # first generate subset of dimension names - these set which dimension coords will be included - subset_dims = draw(st.lists(st.sampled_from(dim_names), unique=True)) - dim_coord_names_and_lengths = {d: dim_sizes[d] for d in subset_dims} + dim_coord_names_and_lengths = draw(_unique_subset_of(dim_sizes)) # then generate 1D variables for each name dim_coords = { From 469482df933beefa322b8b2ab98ad452311a29f5 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 17 Aug 2022 17:01:40 -0400 Subject: [PATCH 039/155] removed subsequences_of --- xarray/testing/strategies.py | 62 +----------------------- xarray/tests/test_strategies.py | 83 --------------------------------- 2 files changed, 1 insertion(+), 144 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 71edbc1fc75..f6a9a7ea575 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -1,15 +1,4 @@ -from typing import ( - Any, - Hashable, - List, - Mapping, - Optional, - Sequence, - Set, - Tuple, - TypeVar, - Union, -) +from typing import Any, Hashable, List, Mapping, Optional, Set, Tuple, Union import hypothesis.extra.numpy as npst import hypothesis.strategies as st @@ -245,55 +234,6 @@ def variables( return xr.Variable(dims=dims, data=data, attrs=attrs) -El = TypeVar("El") - - -# Mostly from the unfinished PR https://github.com/HypothesisWorks/hypothesis/pull/1533 -# TODO Should move this function upstream by opening new PR -@st.composite -def subsequences_of( - draw: st.DrawFn, - elements: Union[Sequence[El], Mapping[str, El]], - min_size: int = 0, - max_size: int = None, -) -> st.SearchStrategy[Sequence[El]]: - """ - Returns a strategy which generates sub-sequences of the input sequence. - - Order is guaranteed to be preserved in the result. - - Requires the hypothesis package to be installed. - - Parameters - ---------- - elements: Elements from which to construct the subsequence - min_size: int - Minimum size of the returned subsequences. - Default is 0. - max_size: int, optional - Maximum size of the returned subsequences. - Default is the full size of the input sequence. - """ - if max_size is None: - max_size = len(elements) - check_valid_sizes(min_size, max_size) - - def element_mask() -> List[bool]: - num_include = draw(st.integers(min_size, max_size)) - num_exclude = len(elements) - num_include - choices = [True] * num_include + [False] * num_exclude - assert len(elements) == len(choices) - return draw(st.permutations(choices)) - - if isinstance(elements, dict): - element_includes = zip(elements.keys(), elements.values(), element_mask()) - return {k: v for k, v, include in element_includes if include} - else: - element_includes = zip(elements, element_mask()) - # TODO this sorted call doesn't actually guarantee elements are sorted in same order they were supplied in - return sorted(element for element, include in element_includes if include) - - @st.composite def _unique_subset_of( draw: st.DrawFn, d: Mapping[Hashable, Any] diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index 25034280814..dd6975dc1b9 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -19,7 +19,6 @@ dimension_names, dimension_sizes, np_arrays, - subsequences_of, valid_dtypes, variables, ) @@ -149,88 +148,6 @@ def test_given_fixed_sizes_and_arbitrary_data(self, data): assert var.shape == (2, 3) -# All from the unfinished PR https://github.com/HypothesisWorks/hypothesis/pull/1533 -class TestSubsequencesOfStrategy: - @pytest.mark.xfail( - reason="Can't work out how to import assert_no_examples from hypothesis.tests.common.debug" - ) - def test_subsequence_of_empty(self): - sub_seq_strat = st.lists(st.none(), max_size=0) - assert_no_examples(sub_seq_strat) - - @given(st.data(), st.lists(st.integers())) - def test_subsequence_sizing(self, data, seq): - sub_seq_strat = subsequences_of(seq) - sub_seq = data.draw(sub_seq_strat) - - assert isinstance(sub_seq, list) - assert len(sub_seq) <= len(seq) - - @given(st.data(), st.lists(st.integers())) - def test_subsequence_only_original_elements(self, data, seq): - sub_seq_strat = subsequences_of(seq) - sub_seq = data.draw(sub_seq_strat) - - assert isinstance(sub_seq, list) - assert len(sub_seq) <= len(seq) - - @given(st.data(), st.lists(st.integers())) - def test_subsequence_elements_not_over_drawn(self, data, seq): - sub_seq_strat = subsequences_of(seq) - sub_seq = data.draw(sub_seq_strat) - - assert not (set(sub_seq) - set(seq)) - - @given(st.data(), st.lists(st.integers())) - def test_subsequence_original_elements_not_over_produced(self, data, seq): - sub_seq_strat = subsequences_of(seq) - sub_seq = data.draw(sub_seq_strat) - - # Per unique item, check that they don't occur in the subsequence - # more times that they appear in the source. - for item in set(sub_seq): - assert sub_seq.count(item) <= seq.count(item) - - @given(st.data(), st.lists(st.integers())) - def test_subsequence_max_size_constraint(self, data, seq): - max_size_strat = st.integers(min_value=0, max_value=len(seq)) - max_size = data.draw(max_size_strat) - - sub_seq_strat = subsequences_of(seq, max_size=max_size) - sub_seq = data.draw(sub_seq_strat) - - assert len(sub_seq) <= max_size - - @given(st.data(), st.lists(st.integers())) - def test_subsequence_min_size_constraint(self, data, seq): - min_size_strat = st.integers(min_value=0, max_value=len(seq)) - min_size = data.draw(min_size_strat) - - sub_seq_strat = subsequences_of(seq, min_size=min_size) - sub_seq = data.draw(sub_seq_strat) - - assert len(sub_seq) >= min_size - - @given(st.data(), st.lists(st.integers())) - def test_subsequence_min_max_size_constraint(self, data, seq): - min_size_strat = st.integers(min_value=0, max_value=len(seq)) - min_size = data.draw(min_size_strat) - - max_size_strat = st.integers(min_value=min_size, max_value=len(seq)) - max_size = data.draw(max_size_strat) - - sub_seq_strat = subsequences_of(seq, min_size=min_size, max_size=max_size) - sub_seq = data.draw(sub_seq_strat) - - assert min_size <= len(sub_seq) <= max_size - - # this is a new test, important for keeping dimension names in order - @given(st.data(), st.lists(st.integers())) - def test_ordering_preserved(self, data, seq): - subsequence_of_dims = data.draw(subsequences_of(seq)) - assert sorted(subsequence_of_dims) == subsequence_of_dims - - class TestCoordinateVariablesStrategy: @given(coordinate_variables(dim_sizes={"x": 2, "y": 3})) def test_alignable(self, coord_vars): From ced1a9f1172c565c7ec14998cdcd1ddb1b13a6bb Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 17 Aug 2022 21:04:09 +0000 Subject: [PATCH 040/155] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/testing/strategies.py | 1 - 1 file changed, 1 deletion(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 6eb477c73e9..ef532f37b11 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -1,4 +1,3 @@ - from typing import Any, Hashable, List, Mapping, Optional, Set, Tuple, Union import hypothesis.extra.numpy as npst From a3c9ad07559963044c3c489d124298d8a52ebf78 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 17 Aug 2022 17:21:31 -0400 Subject: [PATCH 041/155] fix draw(st.booleans()) --- xarray/testing/strategies.py | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 6eb477c73e9..01fbae00ac9 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -1,4 +1,3 @@ - from typing import Any, Hashable, List, Mapping, Optional, Set, Tuple, Union import hypothesis.extra.numpy as npst @@ -278,7 +277,7 @@ def coordinate_variables( all_coords = {} # Possibly generate 1D "dimension coordinates" - explicit possibility not to include amy helps with shrinking - if dim_names and st.booleans(): + if dim_names and draw(st.booleans()): # first generate subset of dimension names - these set which dimension coords will be included dim_coord_names_and_lengths = draw(_unique_subset_of(dim_sizes)) @@ -290,7 +289,7 @@ def coordinate_variables( all_coords.update(dim_coords) # Possibly generate ND "non-dimension coordinates" - explicit possibility not to include any helps with shrinking - if st.booleans(): + if draw(st.booleans()): non_dim_coord_vars = draw(_alignable_variables(dim_sizes=dim_sizes)) # can't have same name as a dimension @@ -492,14 +491,25 @@ def datasets( else: # nothing provided, so generate everything consistently by drawing dims to match data, and coords to match both dim_sizes = draw(dimension_sizes()) - # TODO allow for no coordinate variables - coords = draw(coordinate_variables(dim_sizes=dim_sizes)) + + # Allow for no coordinate variables - helps with shrinking + if draw(st.booleans()): + coords = draw(coordinate_variables(dim_sizes=dim_sizes)) + else: + coords = {} + coord_names = list(coords.keys()) - data_var_names = names.filter(lambda n: n not in coord_names) - # TODO allow for no data variables - data_vars = draw( - data_variables(dim_sizes=dim_sizes, allowed_names=data_var_names) - ) + allowed_data_var_names = names.filter(lambda n: n not in coord_names) + + # Allow for no data variables - helps with shrinking + if draw(st.booleans()): + draw( + data_variables( + dim_sizes=dim_sizes, allowed_names=allowed_data_var_names + ) + ) + else: + data_vars = {} return xr.Dataset(data_vars=data_vars, coords=coords, attrs=attrs) From 404111d0fdf5f9854514f61c9aeeaf7dcc6a3b6f Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Tue, 23 Aug 2022 13:46:47 -0400 Subject: [PATCH 042/155] remove all references to chunking until chunks strategy merged upstream in dask --- doc/api.rst | 2 - doc/user-guide/testing.rst | 6 +- xarray/testing/strategies.py | 187 +------------------------------- xarray/tests/test_strategies.py | 15 --- 4 files changed, 3 insertions(+), 207 deletions(-) diff --git a/doc/api.rst b/doc/api.rst index 581ab74a6f9..189c5285b8f 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -1081,8 +1081,6 @@ Hypothesis Testing Strategies testing.strategies.dataarrays testing.strategies.data_variables testing.strategies.datasets - testing.strategies.chunks - testing.strategies.chunksizes Exceptions ========== diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index 018c17c5aad..8b2e2b9fea7 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -54,8 +54,6 @@ These strategies are accessible in the :py:module::`xarray.testing.strategies` m testing.strategies.dataarrays testing.strategies.data_variables testing.strategies.datasets - testing.strategies.chunks - testing.strategies.chunksizes Generating Examples ~~~~~~~~~~~~~~~~~~~ @@ -104,8 +102,8 @@ examples. data=xrst.np_arrays(shape=(3, 4), dtype=np.dtype("int32")) ).example() -This also works with strategies defined in other packages, for example the ``chunks`` strategy defined in -``dask.array.strategies``. +This also works with customs strategies, or strategies defined in other packages. +For example you could create a ``chunks`` strategy to specify particular chunking patterns for a dask-backed array. .. warning:: Passing multiple different strategies to the same constructor can lead to poor example generation performance. diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 01fbae00ac9..1411ebf2862 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -1,4 +1,4 @@ -from typing import Any, Hashable, List, Mapping, Optional, Set, Tuple, Union +from typing import Any, Hashable, List, Mapping, Tuple, Union import hypothesis.extra.numpy as npst import hypothesis.strategies as st @@ -512,188 +512,3 @@ def datasets( data_vars = {} return xr.Dataset(data_vars=data_vars, coords=coords, attrs=attrs) - - -@st.composite -def block_lengths( - draw: st.DrawFn, - ax_length: int, - min_chunk_length: int = 1, - max_chunk_length: Optional[int] = None, -) -> st.SearchStrategy[Tuple[int, ...]]: - """Generate different chunking patterns along one dimension of an array.""" - - chunks = [] - remaining_length = ax_length - while remaining_length > 0: - _max_chunk_length = ( - min(remaining_length, max_chunk_length) - if max_chunk_length - else remaining_length - ) - - if min_chunk_length > _max_chunk_length: - # if we are at the end of the array we have no choice but to use a smaller chunk - chunk = remaining_length - else: - chunk = draw( - st.integers(min_value=min_chunk_length, max_value=_max_chunk_length) - ) - - chunks.append(chunk) - remaining_length = remaining_length - chunk - - return tuple(chunks) - - -# TODO we could remove this once dask/9374 is merged upstream -@st.composite -def chunks( - draw: st.DrawFn, - shape: Tuple[int, ...], - axes: Optional[Union[int, Tuple[int, ...]]] = None, - min_chunk_length: int = 1, - max_chunk_length: Optional[int] = None, -) -> st.SearchStrategy[Tuple[Tuple[int, ...], ...]]: - """ - Generates different chunking patterns for an N-D array with a given shape. - - Returns chunking structure as a tuple of tuples of ints, with each inner tuple containing - the block lengths along one dimension of the array. - - You can limit chunking to specific axes using the `axes` kwarg, and specify minimum and - maximum block lengths. - - Requires the hypothesis package to be installed. - - Parameters - ---------- - shape : tuple of ints - Shape of the array for which you want to generate a chunking pattern. - axes : None or int or tuple of ints, optional - ... - min_chunk_length : int, default is 1 - Minimum chunk length to use along all axes. - max_chunk_length: int, optional - Maximum chunk length to use along all axes. - Default is that the chunk can be as long as the length of the array along that axis. - - Examples - -------- - Chunking along all axes by default - - >>> chunks(shape=(2, 3)).example() - ((1, 1), (1, 2)) - - Chunking only along the second axis - - >>> chunks(shape=(2, 3), axis=1).example() - ((2,), (1, 1, 1)) - - Minimum size chunks of length 2 along all axes - - >>> chunks(shape=(2, 3), min_chunk_length=2).example() - ((2,), (2, 1)) - - Smallest possible chunks along all axes - - >>> chunks(shape=(2, 3), max_chunk_length=1).example() - ((1, 1), (1, 1, 1)) - - Maximum size chunks along all axes - - >>> chunks(shape=(2, 3), axes=()).example() - ((2,), (3,)) - - See Also - -------- - testing.strategies.chunks - DataArray.chunk - DataArray.chunks - """ - - if min_chunk_length < 1 or not isinstance(min_chunk_length, int): - raise ValueError("min_chunk_length must be an integer >= 1") - - if max_chunk_length: - if max_chunk_length < 1 or not isinstance(min_chunk_length, int): - raise ValueError("max_chunk_length must be an integer >= 1") - - if axes is None: - axes = tuple(range(len(shape))) - elif isinstance(axes, int): - axes = (axes,) - - chunks = [] - for axis, ax_length in enumerate(shape): - - _max_chunk_length = ( - min(max_chunk_length, ax_length) if max_chunk_length else ax_length - ) - - if axes is not None and axis in axes: - block_lengths_along_ax = draw( - block_lengths( - ax_length, - min_chunk_length=min_chunk_length, - max_chunk_length=_max_chunk_length, - ) - ) - else: - # don't chunk along this dimension - block_lengths_along_ax = (ax_length,) - - chunks.append(block_lengths_along_ax) - - return tuple(chunks) - - -@st.composite -def chunksizes( - draw: st.DrawFn, - sizes: Mapping[str, int], - dims: Set[str] = None, - min_chunk_length: int = 1, - max_chunk_length: int = None, -) -> st.SearchStrategy[Mapping[str, Tuple[int, ...]]]: - """ - Generate different chunking patterns for an xarray object with given sizes. - - Returns chunking structure as a mapping of dimension names to tuples of ints, - with each tuple containing the block lengths along one dimension of the object. - - You can limit chunking to specific dimensions given by the `dim` kwarg. - - Requires the hypothesis package to be installed. - - Parameters - ---------- - sizes : mapping of dimension names to ints - Size of the object for which you want to generate a chunking pattern. - dims : set of str, optional - Dimensions to chunk along. Default is to chunk along all dimensions. - min_chunk_length : int, default is 1 - Minimum chunk length to use along all dimensions. - max_chunk_length: int, optional - Maximum chunk length to use along all dimensions. - Default is that the chunk can be as long as the length of the array along that dimension. - - See Also - -------- - testing.strategies.chunks - DataArray.chunk - DataArray.chunksizes - DataArray.sizes - """ - shape = tuple(sizes.values()) - axes = tuple(list(sizes.keys()).index(d) for d in dims) if dims else None - _chunks = draw( - chunks( - shape=shape, - axes=axes, - min_chunk_length=min_chunk_length, - max_chunk_length=max_chunk_length, - ) - ) - - return {d: c for d, c in zip(list(sizes.keys()), _chunks)} diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index dd6975dc1b9..7ac43253ef6 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -218,18 +218,3 @@ class TestDatasetsStrategy: @given(datasets()) def test_given_nothing(self, ds): assert isinstance(ds, Dataset) - - -@pytest.mark.xfail -@given(st.data()) -def test_chained_chunking_example(data): - import dask.array.strategies as dast - - def chunk(da): - return da.chunk(dast.chunks(da.shape)) - - chunked_dataarrays = xrst.dataarrays().flatmap(chunk) - - chunked_da = data.draw(chunked_dataarrays()) - - assert ... From 3764a7b11c1a8a529ed6b9fbcba238caa6d579f9 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Tue, 23 Aug 2022 18:17:56 -0400 Subject: [PATCH 043/155] added example of complicated strategy for dims dict --- doc/user-guide/testing.rst | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index 8b2e2b9fea7..00107eeab74 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -102,7 +102,7 @@ examples. data=xrst.np_arrays(shape=(3, 4), dtype=np.dtype("int32")) ).example() -This also works with customs strategies, or strategies defined in other packages. +This also works with custom strategies, or strategies defined in other packages. For example you could create a ``chunks`` strategy to specify particular chunking patterns for a dask-backed array. .. warning:: @@ -167,6 +167,29 @@ You can also use this to specify that you want examples which are missing some p # Generates only dataarrays with no coordinates xrst.dataarrays(coords=st.just({})).example() +Through a combination of chaining strategies and fixing arguments, you can specify quite complicated requirements on the +objects your chained strategy will generate. + +.. ipython:: python + + fixed_x_variable_y_maybe_z = st.fixed_dictionaries( + {"x": st.just(2), "y": st.integers(3, 4)}, optional={"z": st.just(2)} + ) + + fixed_x_variable_y_maybe_z.example() + + special_dataarrays = xrst.dataarrays(dims=fixed_x_variable_y_maybe_z) + + special_dataarrays.example() + special_dataarrays.example() + +Here we have used one of hypothesis' built-in strategies ``fixed_dictionaries`` to create a strategy which generates +mappings of dimension names to lengths (i.e. the ``size`` of the xarray object we want). +This particular strategy will always generate an ``x`` dimension of length 2, and a ``y`` dimension of +length either 3 or 4, and will sometimes also generate a ``z`` dimension of length 2. +By feeding this strategy for dictionaries into the `dims` argument of xarray's `dataarrays` strategy, we can generate +arbitrary ``DataArray`` objects whose dimensions will always match these specifications. + Creating Duck-type Arrays ~~~~~~~~~~~~~~~~~~~~~~~~~ From 9723e454e50aafa0c1e0e174ed3b7b9e9a4bf376 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Tue, 30 Aug 2022 15:51:05 -0400 Subject: [PATCH 044/155] remove superfluous utils file --- xarray/testing/utils.py | 36 ------------------------------------ 1 file changed, 36 deletions(-) delete mode 100644 xarray/testing/utils.py diff --git a/xarray/testing/utils.py b/xarray/testing/utils.py deleted file mode 100644 index 2bd353e2116..00000000000 --- a/xarray/testing/utils.py +++ /dev/null @@ -1,36 +0,0 @@ -import warnings -from contextlib import contextmanager - - -@contextmanager -def suppress_warning(category, message=""): - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", category=category, message=message) - - yield - - -def create_dimension_names(ndim): - return [f"dim_{n}" for n in range(ndim)] - - -def valid_dims_from_axes(dims, axes): - if axes is None: - return None - - if axes == 0 and len(dims) == 0: - return None - - if isinstance(axes, int): - return dims[axes] - - return [dims[axis] for axis in axes] - - -def valid_axes_from_dims(all_dims, dims): - if dims is None: - return None - elif isinstance(dims, list): - return [all_dims.index(dim) for dim in dims] - else: - return all_dims.index(dims) From 2e44860aeed8d25510b547dd1974b7f071e7eb17 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Tue, 30 Aug 2022 16:58:46 -0400 Subject: [PATCH 045/155] removed elements strategy --- xarray/testing/strategies.py | 34 +++++----------------------------- 1 file changed, 5 insertions(+), 29 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 1411ebf2862..2cee03ccf65 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -9,7 +9,6 @@ __all__ = [ "valid_dtypes", - "np_arrays", "names", "dimension_names", "dimension_sizes", @@ -30,25 +29,11 @@ valid_dtypes.__doc__ = """Generates only those numpy dtypes which xarray can handle.""" -def elements(dtype) -> st.SearchStrategy[Any]: - """ - Generates scalar elements to go in a numpy-like array. - - Requires the hypothesis package to be installed. - """ - max_value = 100 - min_value = 0 if dtype.kind == "u" else -max_value - - return npst.from_dtype( - dtype, allow_infinity=False, min_value=min_value, max_value=max_value - ) - - -@st.composite def np_arrays( - draw: st.DrawFn, - shape: Union[Tuple[int], st.SearchStrategy[Tuple[int]]] = None, - dtype: Union[np.dtype, st.SearchStrategy[np.dtype]] = None, + shape: Union[Tuple[int], st.SearchStrategy[Tuple[int]]] = npst.array_shapes( + max_side=4 + ), + dtype: Union[np.dtype, st.SearchStrategy[np.dtype]] = valid_dtypes, ) -> st.SearchStrategy[np.ndarray]: """ Generates arbitrary numpy arrays with xarray-compatible dtypes. @@ -61,17 +46,8 @@ def np_arrays( dtype Default is to use any of the valid_dtypes defined for xarray. """ - if shape is None: - shape = draw(npst.array_shapes()) - elif isinstance(shape, st.SearchStrategy): - shape = draw(shape) - - if dtype is None: - dtype = draw(valid_dtypes) - elif isinstance(dtype, st.SearchStrategy): - dtype = draw(dtype) - return draw(npst.arrays(dtype=dtype, shape=shape, elements=elements(dtype))) + return npst.arrays(dtype=dtype, shape=shape) names = st.text(st.characters(), min_size=1) From 1cc073b0e3f1888f80db7e7ab6bcabd6012a5b16 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Tue, 30 Aug 2022 16:59:38 -0400 Subject: [PATCH 046/155] removed np_arrays strategy from public API --- doc/api.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/api.rst b/doc/api.rst index 189c5285b8f..aba048453ef 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -1072,7 +1072,6 @@ Hypothesis Testing Strategies :toctree: generated/ testing.strategies.valid_dtypes - testing.strategies.np_arrays testing.strategies.names testing.strategies.dimension_names testing.strategies.dimension_sizes From 603e6bbf99f585b3fd10de71f3f18f513e6ca24d Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Tue, 30 Aug 2022 17:04:32 -0400 Subject: [PATCH 047/155] min_ndims -> min_dims --- doc/user-guide/testing.rst | 2 +- xarray/testing/strategies.py | 32 ++++++++++++++++---------------- xarray/tests/test_strategies.py | 10 +++++----- 3 files changed, 22 insertions(+), 22 deletions(-) diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index 00107eeab74..3a56e6b229b 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -120,7 +120,7 @@ For example you could create a ``chunks`` strategy to specify particular chunkin @given(st.data()) def test_something_else_inefficiently(data): - arrs = xrst.np_arrays() # generates arrays of any shape + arrs = npst.arrays(dtype=) # generates arrays of any shape dims = xrst.dimension_names() # generates lists of any number of dimensions # Drawing examples from this strategy is likely to have poor performance diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 2cee03ccf65..63c176daa2e 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -55,8 +55,8 @@ def np_arrays( def dimension_names( - min_ndims: int = 0, - max_ndims: int = 3, + min_dims: int = 0, + max_dims: int = 3, ) -> st.SearchStrategy[List[str]]: """ Generates an arbitrary list of valid dimension names. @@ -65,23 +65,23 @@ def dimension_names( Parameters ---------- - min_ndims + min_dims Minimum number of dimensions in generated list. - max_ndims + max_dims Maximum number of dimensions in generated list. """ return st.lists( elements=names, - min_size=min_ndims, - max_size=max_ndims, + min_size=min_dims, + max_size=max_dims, unique=True, ) def dimension_sizes( - min_ndims: int = 0, - max_ndims: int = 3, + min_dims: int = 0, + max_dims: int = 3, min_length: int = 1, max_length: int = None, ) -> st.SearchStrategy[Mapping[str, int]]: @@ -92,10 +92,10 @@ def dimension_sizes( Parameters ---------- - min_ndims: int, optional + min_dims: int, optional Minimum number of dimensions in generated list. Default is 1. - max_ndims: int, optional + max_dims: int, optional Maximum number of dimensions in generated list. Default is 3. min_length: int, optional @@ -112,8 +112,8 @@ def dimension_sizes( return st.dictionaries( keys=names, values=st.integers(min_value=min_length, max_value=max_length), - min_size=min_ndims, - max_size=max_ndims, + min_size=min_dims, + max_size=max_dims, ) @@ -166,7 +166,7 @@ def variables( if data is not None and dims is None: # no dims -> generate dims to match data data = draw(data) - dims = draw(dimension_names(min_ndims=data.ndim, max_ndims=data.ndim)) + dims = draw(dimension_names(min_dims=data.ndim, max_dims=data.ndim)) elif dims is not None and data is None: # no data -> generate data to match dims @@ -198,7 +198,7 @@ def variables( else: # nothing provided, so generate everything consistently by drawing dims to match data data = draw(np_arrays()) - dims = draw(dimension_names(min_ndims=data.ndim, max_ndims=data.ndim)) + dims = draw(dimension_names(min_dims=data.ndim, max_dims=data.ndim)) if isinstance(attrs, st.SearchStrategy): attrs = draw(attrs) @@ -333,7 +333,7 @@ def dataarrays( if data is not None and dims is None: # no dims -> generate dims to match data data = draw(data) - dim_names = draw(dimension_names(min_ndims=data.ndim, max_ndims=data.ndim)) + dim_names = draw(dimension_names(min_dims=data.ndim, max_dims=data.ndim)) dim_sizes = {n: l for n, l in zip(dim_names, data.shape)} coords = draw(coordinate_variables(dim_sizes=dim_sizes)) @@ -373,7 +373,7 @@ def dataarrays( else: # nothing provided, so generate everything consistently by drawing dims to match data, and coords to match both data = draw(np_arrays()) - dim_names = draw(dimension_names(min_ndims=data.ndim, max_ndims=data.ndim)) + dim_names = draw(dimension_names(min_dims=data.ndim, max_dims=data.ndim)) dim_sizes = {n: l for n, l in zip(dim_names, data.shape)} coords = draw(coordinate_variables(dim_sizes=dim_sizes)) diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index 7ac43253ef6..4ffece16d4c 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -61,7 +61,7 @@ def test_types(self, dims): def test_unique(self, dims): assert len(set(dims)) == len(dims) - @given(dimension_names(min_ndims=3, max_ndims=3)) + @given(dimension_names(min_dims=3, max_dims=3)) def test_fixed_number_of_dims(self, dims): assert isinstance(dims, list) assert len(dims) == 3 @@ -75,7 +75,7 @@ def test_types(self, dims): assert isinstance(d, str) assert isinstance(n, int) - @given(dimension_sizes(min_ndims=3, max_ndims=3)) + @given(dimension_sizes(min_dims=3, max_dims=3)) def test_fixed_number_of_dims(self, dims): assert isinstance(dims, dict) assert len(dims) == 3 @@ -98,7 +98,7 @@ def test_given_fixed_dims_list_and_fixed_data(self, data): @given(st.data()) def test_given_arbitrary_dims_list_and_arbitrary_data(self, data): arrs = np_arrays(shape=(2, 3)) - dims = dimension_names(min_ndims=2) + dims = dimension_names(min_dims=2) var = data.draw(variables(data=arrs, dims=dims)) assert var.shape == (2, 3) @@ -127,7 +127,7 @@ def test_given_fixed_dims_list(self, data): @given(st.data()) def test_given_arbitrary_dims_list(self, data): - dims = dimension_names(min_ndims=1, max_ndims=1) + dims = dimension_names(min_dims=1, max_dims=1) var = data.draw(variables(dims=dims)) assert len(list(var.dims)) == 1 @@ -202,7 +202,7 @@ def test_given_data(self, data): @given(st.data()) def test_given_data_and_dims(self, data): arrs = np_arrays(shape=(2, 3)) - dims = dimension_names(min_ndims=2) + dims = dimension_names(min_dims=2) da = data.draw(dataarrays(data=arrs, dims=dims)) assert da.shape == (2, 3) From 63bb36254377d40bfeaf835f6179f0a06dc0e70e Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 31 Aug 2022 15:22:17 -0400 Subject: [PATCH 048/155] forbid non-matching dims and data completely --- doc/user-guide/testing.rst | 24 +++++++++----------- xarray/testing/strategies.py | 40 ++++++++++++++++++++++++++++----- xarray/tests/test_strategies.py | 16 ++++++++----- 3 files changed, 56 insertions(+), 24 deletions(-) diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index 3a56e6b229b..f638628d113 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -106,33 +106,29 @@ This also works with custom strategies, or strategies defined in other packages. For example you could create a ``chunks`` strategy to specify particular chunking patterns for a dask-backed array. .. warning:: - Passing multiple different strategies to the same constructor can lead to poor example generation performance. + When passing multiple different strategies to the same constructor the drawn examples must be mutually compatible. - This is because in order to construct a valid xarray object to return, our strategies must check that the - variables / dimensions / coordinates are mutually compatible. We do this using ``hypothesis.assume``, which throws - away any generated examples not meeting the required condition. - - Therefore if you pass multiple custom strategies to a strategy constructor which are not compatible in enough cases, - most of the examples they generate will be mutually incompatible. This will likely lead to poor example generation - performance, manifesting as a ``hypothesis.errors.FailedHealthCheck`` being raised. For example: + In order to construct a valid xarray object to return, our strategies must check that the + variables / dimensions / coordinates are mutually compatible. If you pass multiple custom strategies to a strategy + constructor which are not compatible in all cases, an error will be raised, *even if they are still compatible in + other cases*. For example .. code-block:: @given(st.data()) def test_something_else_inefficiently(data): - arrs = npst.arrays(dtype=) # generates arrays of any shape + arrs = npst.arrays(dtype=valid_dtypes) # generates arrays of any shape dims = xrst.dimension_names() # generates lists of any number of dimensions - # Drawing examples from this strategy is likely to have poor performance + # Drawing examples from this strategy will raise a hypothesis.errors.Unsatisfiable error. var = data.draw(xrst.variables(data=arrs, dims=dims)) assert ... Here we have passed custom strategies which won't often be compatible: only rarely will the array's ``ndims`` - correspond to the number of dimensions drawn. - - To avoid this problem either allow xarray's strategies to automatically generate compatible data for you, or be more - selective about cases when passing multiple custom strategies to the same constructor. + correspond to the number of dimensions drawn. We forbid arguments that are only *sometimes* compatible in order to + avoid extremely poor example generation performance (as generating invalid examples and rejecting them is + potentially unboundedly inefficient). Fixing Arguments diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 63c176daa2e..56b45f1354d 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -3,7 +3,7 @@ import hypothesis.extra.numpy as npst import hypothesis.strategies as st import numpy as np -from hypothesis import assume +from hypothesis.errors import Unsatisfiable import xarray as xr @@ -152,6 +152,11 @@ def variables( data argument if given or arbitrarily generated if not. Default is to generate arbitrary dimension names for each axis in data. attrs: Strategy which generates dicts, optional + + Raises + ------ + hypothesis.errors.Unsatisfiable + If custom strategies passed try to draw examples which together cannot create a valid Variable. """ if any( @@ -188,12 +193,22 @@ def variables( # TODO or we could just raise in this case? if isinstance(dims, List): data = draw(data) - assume(data.ndim == len(dims)) + if data.ndim != len(dims): + raise Unsatisfiable( + f"Strategy attempting to generate data with {data.ndim} dims but {len(dims)} " + "unique dimension names. Please only pass strategies which are guaranteed to " + "draw compatible examples for data and dims." + ) else: # should be a mapping of form {dim_names: lengths} data = draw(data) shape = tuple(dims.values()) - assume(data.shape == shape) + if data.shape != shape: + raise Unsatisfiable( + f"Strategy attempting to generate data with shape {data.shape} dims but dimension " + f"sizes implying shape {shape}. Please only pass strategies which are guaranteed to " + "draw compatible examples for data and dims." + ) else: # nothing provided, so generate everything consistently by drawing dims to match data @@ -322,6 +337,11 @@ def dataarrays( name: Strategy for generating a string name, optional Default is to use the `names` strategy, or to create an unnamed DataArray. attrs: Strategy which generates dicts, optional + + Raises + ------ + hypothesis.errors.Unsatisfiable + If custom strategies passed try to draw examples which together cannot create a valid DataArray. """ if name is None: @@ -359,14 +379,24 @@ def dataarrays( if isinstance(dims, List): dim_names = dims data = draw(data) - assume(data.ndim == len(dims)) + if data.ndim != len(dims): + raise Unsatisfiable( + f"Strategy attempting to generate data with {data.ndim} dims but {len(dims)} " + "unique dimension names. Please only pass strategies which are guaranteed to " + "draw compatible examples for data and dims." + ) dim_sizes = {n: l for n, l in zip(dims, data.shape)} else: # should be a mapping of form {dim_names: lengths} data = draw(data) dim_sizes = dims dim_names, shape = list(dims.keys()), tuple(dims.values()) - assume(data.shape == shape) + if data.shape != shape: + raise Unsatisfiable( + f"Strategy attempting to generate data with shape {data.shape} dims but dimension " + f"sizes implying shape {shape}. Please only pass strategies which are guaranteed to " + "draw compatible examples for data and dims." + ) coords = draw(coordinate_variables(dim_sizes=dim_sizes)) diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index 4ffece16d4c..25566bf95fb 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -8,6 +8,7 @@ import hypothesis.extra.numpy as npst import hypothesis.strategies as st from hypothesis import given +from hypothesis.errors import Unsatisfiable from xarray import DataArray, Dataset from xarray.core.variable import Variable @@ -98,11 +99,14 @@ def test_given_fixed_dims_list_and_fixed_data(self, data): @given(st.data()) def test_given_arbitrary_dims_list_and_arbitrary_data(self, data): arrs = np_arrays(shape=(2, 3)) - dims = dimension_names(min_dims=2) + dims = dimension_names(min_dims=2, max_dims=2) var = data.draw(variables(data=arrs, dims=dims)) - assert var.shape == (2, 3) + dims = dimension_names(min_dims=3) + with pytest.raises(Unsatisfiable): + data.draw(variables(data=arrs, dims=dims)) + @given(st.data()) def test_given_fixed_data(self, data): arr = np.asarray([[1, 2], [3, 4]]) @@ -202,15 +206,17 @@ def test_given_data(self, data): @given(st.data()) def test_given_data_and_dims(self, data): arrs = np_arrays(shape=(2, 3)) - dims = dimension_names(min_dims=2) + dims = dimension_names(min_dims=2, max_dims=2) da = data.draw(dataarrays(data=arrs, dims=dims)) - assert da.shape == (2, 3) + dims = dimension_names(min_dims=3, max_dims=3) + with pytest.raises(Unsatisfiable): + data.draw(dataarrays(data=arrs, dims=dims)) + arrs = np_arrays(shape=(3, 4)) dims = st.just({"x": 3, "y": 4}) da = data.draw(dataarrays(data=arrs, dims=dims)) - assert da.sizes == {"x": 3, "y": 4} From 69ec230e3d34f24c9ddcc646dc5ca6d6f948d569 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 31 Aug 2022 17:54:00 -0400 Subject: [PATCH 049/155] simple test for data_variables strategy --- xarray/testing/strategies.py | 10 ++++++++-- xarray/tests/test_strategies.py | 14 ++++++++++++++ 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 56b45f1354d..4a99e37a842 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -420,7 +420,7 @@ def dataarrays( def data_variables( draw: st.DrawFn, dim_sizes: Mapping[str, int], - allowed_names: st.SearchStrategy[str] = None, + allowed_names: st.SearchStrategy[str] = names, ) -> st.SearchStrategy[Mapping[str, xr.Variable]]: """ Generates dicts of alignable Variable objects for use as Dataset data variables. @@ -492,7 +492,10 @@ def datasets( attrs: Strategy which generates dicts, optional """ - if any(arg is not None for arg in [data_vars, coords, dims, attrs]): + if coords is not None: + raise NotImplementedError() + + if any(arg is not None for arg in [data_vars, dims]): raise NotImplementedError() else: # nothing provided, so generate everything consistently by drawing dims to match data, and coords to match both @@ -517,4 +520,7 @@ def datasets( else: data_vars = {} + if attrs is not None: + raise NotImplementedError() + return xr.Dataset(data_vars=data_vars, coords=coords, attrs=attrs) diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index 25566bf95fb..9c6acfe089a 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -220,6 +220,20 @@ def test_given_data_and_dims(self, data): assert da.sizes == {"x": 3, "y": 4} +class TestDataVariablesStrategy: + @given(st.data()) + def test_given_only_sizes(self, data): + dim_sizes = {"x": 2, "y": 3} + data_vars = data.draw(data_variables(dim_sizes=dim_sizes)) + for k, v in data_vars.items(): + assert isinstance(v, Variable) + assert set(v.sizes.items()).issubset(set(dim_sizes.items())) + + @given(st.data()) + def test_given_restricted_names(self, data): + ... + + class TestDatasetsStrategy: @given(datasets()) def test_given_nothing(self, ds): From e5c7e230f7499483401aa91a7488340d7c55e099 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Fri, 2 Sep 2022 14:14:06 -0400 Subject: [PATCH 050/155] passing arguments to datasets strategy --- xarray/testing/strategies.py | 71 +++++++++++++++++++++++++++++++-- xarray/tests/test_strategies.py | 29 ++++++++++++++ 2 files changed, 97 insertions(+), 3 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 4a99e37a842..efc01d3644e 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -490,15 +490,57 @@ def datasets( data argument if given or arbitrarily generated if not. Default is to generate arbitrary dimension sizes. attrs: Strategy which generates dicts, optional + + Raises + ------ + hypothesis.errors.Unsatisfiable + If custom strategies passed try to draw examples which together cannot create a valid DataArray. """ if coords is not None: raise NotImplementedError() - if any(arg is not None for arg in [data_vars, dims]): - raise NotImplementedError() + if data_vars is not None and dims is None: + # no dims -> generate dims to match data + data_vars = draw(data_vars) + dim_sizes = _find_overall_sizes(data_vars) + # TODO only draw coordinate variables whose names don't conflict with data variables + coords = draw(coordinate_variables(dim_sizes=dim_sizes)) + + elif data_vars is None and dims is not None: + # no data -> generate data to match dims + if isinstance(dims, List): + # TODO support dims as list too? + raise NotImplementedError() + else: + # should be a mapping of form {dim_names: lengths} + dim_sizes = draw(dims) + coords = draw(coordinate_variables(dim_sizes=dim_sizes)) + coord_names = list(coords.keys()) + allowed_data_var_names = names.filter(lambda n: n not in coord_names) + data_vars = draw( + data_variables( + dim_sizes=dim_sizes, allowed_names=allowed_data_var_names + ) + ) + + elif data_vars is not None and dims is not None: + # both data and dims provided -> check drawn examples are compatible + dims = draw(dims) + if isinstance(dims, List): + # TODO support dims as list too? + raise NotImplementedError() + else: + # should be a mapping of form {dim_names: lengths} + dim_sizes = dims + data_vars = draw(data_vars) + _check_compatible_sizes(data_vars, dim_sizes) + + # TODO only draw coordinate variables whose names don't conflict with data variables + coords = draw(coordinate_variables(dim_sizes=dim_sizes)) + else: - # nothing provided, so generate everything consistently by drawing dims to match data, and coords to match both + # nothing provided, so generate everything consistently by drawing data to match dims, and coords to match both dim_sizes = draw(dimension_sizes()) # Allow for no coordinate variables - helps with shrinking @@ -524,3 +566,26 @@ def datasets( raise NotImplementedError() return xr.Dataset(data_vars=data_vars, coords=coords, attrs=attrs) + + +def _find_overall_sizes(vars: Mapping[str, xr.Variable]) -> Mapping[str, int]: + """Given a set of variables, find their common sizes.""" + # TODO raise an error if inconsistent (i.e. if different values appear under same key) + sizes_dicts = [v.sizes for v in vars.values()] + dim_sizes = {d: s for dim_sizes in sizes_dicts for d, s in dim_sizes.items()} + return dim_sizes + + +def _check_compatible_sizes( + vars: Mapping[str, xr.Variable], dim_sizes: Mapping[str, int] +): + """Check set of variables have sizes compatible with given dim_sizes. If not raise Unsatisfiable error.""" + + for name, v in vars.items(): + if not set(v.sizes.items()).issubset(set(dim_sizes.items())): + raise Unsatisfiable( + f"Strategy attempting to generate object with dimension sizes {dim_sizes} but drawn " + f"variable {name} has sizes {v.sizes}, which is incompatible." + "Please only pass strategies which are guaranteed to draw compatible examples for data " + "and dims." + ) diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index 9c6acfe089a..068e54cb65b 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -238,3 +238,32 @@ class TestDatasetsStrategy: @given(datasets()) def test_given_nothing(self, ds): assert isinstance(ds, Dataset) + + @given(st.data()) + def test_given_data(self, data): + dim_sizes = {"x": 3, "y": 4} + data_vars = data.draw(data_variables(dim_sizes=dim_sizes)) + ds = data.draw(datasets(data_vars=st.just(data_vars))) + assert set(ds.sizes.items()).issubset(set(dim_sizes.items())) + + @given(st.data()) + def test_given_dims(self, data): + dim_sizes = {"x": 3, "y": 4} + ds = data.draw(datasets(dims=st.just(dim_sizes))) + assert set(ds.sizes.items()).issubset(set(dim_sizes.items())) + + @given(st.data()) + def test_given_data_and_dims(self, data): + dim_sizes = {"x": 3, "y": 4} + data_vars = data.draw(data_variables(dim_sizes=dim_sizes)) + ds = data.draw(datasets(data_vars=st.just(data_vars), dims=st.just(dim_sizes))) + assert set(ds.sizes.items()).issubset(set(dim_sizes.items())) + + incompatible_dim_sizes = {"x": 1, "y": 4} + data_vars = {"foo": Variable(data=[0, 1, 2], dims="x")} + with pytest.raises(Unsatisfiable, match="drawn variable"): + data.draw( + datasets( + data_vars=st.just(data_vars), dims=st.just(incompatible_dim_sizes) + ) + ) From 52f2490a18082347c5bc4f09317eee4f0c0c8cf4 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Fri, 2 Sep 2022 14:25:23 -0400 Subject: [PATCH 051/155] whatsnew --- doc/whats-new.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 9ce51e48983..40403519301 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -22,6 +22,10 @@ v2022.07.0 (unreleased) New Features ~~~~~~~~~~~~ +- Added a suite of hypothesis strategies for generating xarray objects containing arbitrary data, useful for testing. + Accessible under :py:func:`testing.strategies`, and documented in a new page on testing in the User Guide. + (:issue:`6911`, :pull:`6908`) + By `Tom Nicholas `_. Breaking changes ~~~~~~~~~~~~~~~~ From 9b964708c8b8895ebbbf9340d891cb9aa8c98fe7 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Fri, 2 Sep 2022 14:53:41 -0400 Subject: [PATCH 052/155] add attrs strategy --- doc/api.rst | 1 + doc/user-guide/testing.rst | 1 + xarray/testing/strategies.py | 17 ++++++++++++++++- xarray/tests/test_strategies.py | 8 ++++++++ 4 files changed, 26 insertions(+), 1 deletion(-) diff --git a/doc/api.rst b/doc/api.rst index aba048453ef..1be613a37a4 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -1075,6 +1075,7 @@ Hypothesis Testing Strategies testing.strategies.names testing.strategies.dimension_names testing.strategies.dimension_sizes + testing.strategies.attrs testing.strategies.variables testing.strategies.coordinate_variables testing.strategies.dataarrays diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index f638628d113..f4d86620c45 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -49,6 +49,7 @@ These strategies are accessible in the :py:module::`xarray.testing.strategies` m testing.strategies.names testing.strategies.dimension_names testing.strategies.dimension_sizes + testing.strategies.attrs testing.strategies.variables testing.strategies.coordinate_variables testing.strategies.dataarrays diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index efc01d3644e..8351ca058db 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -12,6 +12,7 @@ "names", "dimension_names", "dimension_sizes", + "attrs", "variables", "coordinate_variables", "dataarrays", @@ -50,7 +51,7 @@ def np_arrays( return npst.arrays(dtype=dtype, shape=shape) -names = st.text(st.characters(), min_size=1) +names: st.SearchStrategy[str] = st.text(st.characters(), min_size=1) names.__doc__ = """Generates arbitrary string names for dimensions / variables.""" @@ -117,6 +118,20 @@ def dimension_sizes( ) +_attr_keys = st.text(st.characters()) +_attr_values = st.none() | st.booleans() | st.text(st.characters()) | np_arrays() + + +attrs: st.SearchStrategy[Mapping[str, Any]] = st.recursive( + st.dictionaries(_attr_keys, _attr_values), + lambda children: st.dictionaries(_attr_keys, children), + max_leaves=3, +) +attrs.__doc__ = ( + """Generates arbitrary valid attributes dictionaries for xarray objects.""" +) + + # Is there a way to do this in general? # Could make a Protocol... T_Array = Any diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index 068e54cb65b..22d9d72491d 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -13,6 +13,7 @@ from xarray import DataArray, Dataset from xarray.core.variable import Variable from xarray.testing.strategies import ( + attrs, coordinate_variables, data_variables, dataarrays, @@ -82,6 +83,13 @@ def test_fixed_number_of_dims(self, dims): assert len(dims) == 3 +class TestAttrsStrategy: + @given(attrs) + def test_type(self, attrs): + assert isinstance(attrs, dict) + # TODO how to test the types of values in a recursive object? + + class TestVariablesStrategy: @given(variables()) def test_given_nothing(self, var): From 41fe0b489d0e189187c57a140b85a3224fc3d181 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Fri, 2 Sep 2022 15:00:23 -0400 Subject: [PATCH 053/155] autogenerate attrs for all objects --- xarray/testing/strategies.py | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 8351ca058db..8b0b5f45848 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -144,7 +144,7 @@ def variables( dims: Union[ st.SearchStrategy[List[str]], st.SearchStrategy[Mapping[str, int]] ] = None, - attrs: st.SearchStrategy[Mapping] = None, + attrs: st.SearchStrategy[Mapping] = attrs, ) -> st.SearchStrategy[xr.Variable]: """ Generates arbitrary xarray.Variable objects. @@ -230,13 +230,7 @@ def variables( data = draw(np_arrays()) dims = draw(dimension_names(min_dims=data.ndim, max_dims=data.ndim)) - if isinstance(attrs, st.SearchStrategy): - attrs = draw(attrs) - elif attrs is None: - # TODO autogenerate some attributes - ... - - return xr.Variable(dims=dims, data=data, attrs=attrs) + return xr.Variable(dims=dims, data=data, attrs=draw(attrs)) @st.composite @@ -324,7 +318,7 @@ def dataarrays( st.SearchStrategy[List[str]], st.SearchStrategy[Mapping[str, int]] ] = None, name: st.SearchStrategy[Union[str, None]] = None, - attrs: st.SearchStrategy[Mapping] = None, + attrs: st.SearchStrategy[Mapping] = attrs, ) -> st.SearchStrategy[xr.DataArray]: """ Generates arbitrary xarray.DataArray objects. @@ -427,7 +421,7 @@ def dataarrays( coords=coords, name=name, dims=dim_names, - attrs=attrs, + attrs=draw(attrs), ) @@ -478,7 +472,7 @@ def datasets( dims: Union[ st.SearchStrategy[List[str]], st.SearchStrategy[Mapping[str, int]] ] = None, - attrs: st.SearchStrategy[Mapping] = None, + attrs: st.SearchStrategy[Mapping] = attrs, ) -> st.SearchStrategy[xr.Dataset]: """ Generates arbitrary xarray.Dataset objects. @@ -577,10 +571,7 @@ def datasets( else: data_vars = {} - if attrs is not None: - raise NotImplementedError() - - return xr.Dataset(data_vars=data_vars, coords=coords, attrs=attrs) + return xr.Dataset(data_vars=data_vars, coords=coords, attrs=draw(attrs)) def _find_overall_sizes(vars: Mapping[str, xr.Variable]) -> Mapping[str, int]: From 0e53aa10594c409ae35880df74ccb8996dd39f1d Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Fri, 2 Sep 2022 15:21:04 -0400 Subject: [PATCH 054/155] attempt to make attrs strategy quicker --- xarray/testing/strategies.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 8b0b5f45848..f126889bb78 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -125,7 +125,7 @@ def dimension_sizes( attrs: st.SearchStrategy[Mapping[str, Any]] = st.recursive( st.dictionaries(_attr_keys, _attr_values), lambda children: st.dictionaries(_attr_keys, children), - max_leaves=3, + max_leaves=2, ) attrs.__doc__ = ( """Generates arbitrary valid attributes dictionaries for xarray objects.""" From f659b4bdd4074e0110bb4437951594618c42ae66 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Fri, 2 Sep 2022 15:21:14 -0400 Subject: [PATCH 055/155] extend deadline --- xarray/tests/test_strategies.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index 22d9d72491d..9917e0b5a25 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -7,7 +7,7 @@ import hypothesis.extra.numpy as npst import hypothesis.strategies as st -from hypothesis import given +from hypothesis import given, settings from hypothesis.errors import Unsatisfiable from xarray import DataArray, Dataset @@ -260,6 +260,7 @@ def test_given_dims(self, data): ds = data.draw(datasets(dims=st.just(dim_sizes))) assert set(ds.sizes.items()).issubset(set(dim_sizes.items())) + @settings(deadline=400) @given(st.data()) def test_given_data_and_dims(self, data): dim_sizes = {"x": 3, "y": 4} From d1be3ee672171d8c6f670bcea6d748cb1b427948 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Tue, 6 Sep 2022 17:31:04 -0400 Subject: [PATCH 056/155] attempt to speed up attrs strategy --- xarray/testing/strategies.py | 10 ++++++++-- xarray/tests/test_strategies.py | 1 - 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index f126889bb78..2f806f47ab1 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -119,13 +119,19 @@ def dimension_sizes( _attr_keys = st.text(st.characters()) -_attr_values = st.none() | st.booleans() | st.text(st.characters()) | np_arrays() +_small_arrays = np_arrays( + shape=npst.array_shapes( + max_side=2, + max_dims=3, + ) +) +_attr_values = st.none() | st.booleans() | st.text(st.characters()) | _small_arrays attrs: st.SearchStrategy[Mapping[str, Any]] = st.recursive( st.dictionaries(_attr_keys, _attr_values), lambda children: st.dictionaries(_attr_keys, children), - max_leaves=2, + max_leaves=5, ) attrs.__doc__ = ( """Generates arbitrary valid attributes dictionaries for xarray objects.""" diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index 9917e0b5a25..17ffa7dc60e 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -260,7 +260,6 @@ def test_given_dims(self, data): ds = data.draw(datasets(dims=st.just(dim_sizes))) assert set(ds.sizes.items()).issubset(set(dim_sizes.items())) - @settings(deadline=400) @given(st.data()) def test_given_data_and_dims(self, data): dim_sizes = {"x": 3, "y": 4} From e88f5f0552e69e2b5d3dd17f054db518cdb9ce45 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Tue, 6 Sep 2022 22:37:37 -0400 Subject: [PATCH 057/155] promote all strategies to be functions --- xarray/testing/strategies.py | 60 +++++++++++++++++---------------- xarray/tests/test_strategies.py | 6 ++-- 2 files changed, 34 insertions(+), 32 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 2f806f47ab1..3224a7e6205 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -20,21 +20,24 @@ "datasets", ] + # required to exclude weirder dtypes e.g. unicode, byte_string, array, or nested dtypes. -valid_dtypes: st.SearchStrategy[np.dtype] = ( - npst.integer_dtypes() - | npst.unsigned_integer_dtypes() - | npst.floating_dtypes() - | npst.complex_number_dtypes() -) -valid_dtypes.__doc__ = """Generates only those numpy dtypes which xarray can handle.""" +def valid_dtypes() -> st.SearchStrategy[np.dtype]: + """Generates only those numpy dtypes which xarray can handle.""" + + return ( + npst.integer_dtypes() + | npst.unsigned_integer_dtypes() + | npst.floating_dtypes() + | npst.complex_number_dtypes() + ) def np_arrays( shape: Union[Tuple[int], st.SearchStrategy[Tuple[int]]] = npst.array_shapes( max_side=4 ), - dtype: Union[np.dtype, st.SearchStrategy[np.dtype]] = valid_dtypes, + dtype: Union[np.dtype, st.SearchStrategy[np.dtype]] = valid_dtypes(), ) -> st.SearchStrategy[np.ndarray]: """ Generates arbitrary numpy arrays with xarray-compatible dtypes. @@ -51,8 +54,9 @@ def np_arrays( return npst.arrays(dtype=dtype, shape=shape) -names: st.SearchStrategy[str] = st.text(st.characters(), min_size=1) -names.__doc__ = """Generates arbitrary string names for dimensions / variables.""" +def names() -> st.SearchStrategy[str]: + """Generates arbitrary string names for dimensions / variables.""" + return st.text(st.characters(), min_size=1) def dimension_names( @@ -73,7 +77,7 @@ def dimension_names( """ return st.lists( - elements=names, + elements=names(), min_size=min_dims, max_size=max_dims, unique=True, @@ -111,7 +115,7 @@ def dimension_sizes( max_length = min_length + 5 return st.dictionaries( - keys=names, + keys=names(), values=st.integers(min_value=min_length, max_value=max_length), min_size=min_dims, max_size=max_dims, @@ -128,14 +132,13 @@ def dimension_sizes( _attr_values = st.none() | st.booleans() | st.text(st.characters()) | _small_arrays -attrs: st.SearchStrategy[Mapping[str, Any]] = st.recursive( - st.dictionaries(_attr_keys, _attr_values), - lambda children: st.dictionaries(_attr_keys, children), - max_leaves=5, -) -attrs.__doc__ = ( +def attrs() -> st.SearchStrategy[Mapping[str, Any]]: """Generates arbitrary valid attributes dictionaries for xarray objects.""" -) + return st.recursive( + st.dictionaries(_attr_keys, _attr_values), + lambda children: st.dictionaries(_attr_keys, children), + max_leaves=3, + ) # Is there a way to do this in general? @@ -150,7 +153,7 @@ def variables( dims: Union[ st.SearchStrategy[List[str]], st.SearchStrategy[Mapping[str, int]] ] = None, - attrs: st.SearchStrategy[Mapping] = attrs, + attrs: st.SearchStrategy[Mapping] = attrs(), ) -> st.SearchStrategy[xr.Variable]: """ Generates arbitrary xarray.Variable objects. @@ -299,7 +302,7 @@ def coordinate_variables( non_dim_coord_vars = draw(_alignable_variables(dim_sizes=dim_sizes)) # can't have same name as a dimension - valid_non_dim_coord_names = names.filter(lambda n: n not in dim_names) + valid_non_dim_coord_names = names().filter(lambda n: n not in dim_names) non_dim_coord_names = draw( st.lists( valid_non_dim_coord_names, @@ -323,8 +326,8 @@ def dataarrays( dims: Union[ st.SearchStrategy[List[str]], st.SearchStrategy[Mapping[str, int]] ] = None, - name: st.SearchStrategy[Union[str, None]] = None, - attrs: st.SearchStrategy[Mapping] = attrs, + name: st.SearchStrategy[Union[str, None]] = names(), + attrs: st.SearchStrategy[Mapping] = attrs(), ) -> st.SearchStrategy[xr.DataArray]: """ Generates arbitrary xarray.DataArray objects. @@ -359,8 +362,7 @@ def dataarrays( If custom strategies passed try to draw examples which together cannot create a valid DataArray. """ - if name is None: - name = draw(st.none() | names) + name = draw(st.none() | name) if coords is not None: raise NotImplementedError() @@ -435,7 +437,7 @@ def dataarrays( def data_variables( draw: st.DrawFn, dim_sizes: Mapping[str, int], - allowed_names: st.SearchStrategy[str] = names, + allowed_names: st.SearchStrategy[str] = names(), ) -> st.SearchStrategy[Mapping[str, xr.Variable]]: """ Generates dicts of alignable Variable objects for use as Dataset data variables. @@ -478,7 +480,7 @@ def datasets( dims: Union[ st.SearchStrategy[List[str]], st.SearchStrategy[Mapping[str, int]] ] = None, - attrs: st.SearchStrategy[Mapping] = attrs, + attrs: st.SearchStrategy[Mapping] = attrs(), ) -> st.SearchStrategy[xr.Dataset]: """ Generates arbitrary xarray.Dataset objects. @@ -532,7 +534,7 @@ def datasets( dim_sizes = draw(dims) coords = draw(coordinate_variables(dim_sizes=dim_sizes)) coord_names = list(coords.keys()) - allowed_data_var_names = names.filter(lambda n: n not in coord_names) + allowed_data_var_names = names().filter(lambda n: n not in coord_names) data_vars = draw( data_variables( dim_sizes=dim_sizes, allowed_names=allowed_data_var_names @@ -565,7 +567,7 @@ def datasets( coords = {} coord_names = list(coords.keys()) - allowed_data_var_names = names.filter(lambda n: n not in coord_names) + allowed_data_var_names = names().filter(lambda n: n not in coord_names) # Allow for no data variables - helps with shrinking if draw(st.booleans()): diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index 17ffa7dc60e..4b5b207babd 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -7,7 +7,7 @@ import hypothesis.extra.numpy as npst import hypothesis.strategies as st -from hypothesis import given, settings +from hypothesis import given from hypothesis.errors import Unsatisfiable from xarray import DataArray, Dataset @@ -37,7 +37,7 @@ def test_fixed_dtype(self, arr): @given(st.data()) def test_arbitrary_valid_dtype(self, data): - valid_dtype = data.draw(valid_dtypes) + valid_dtype = data.draw(valid_dtypes()) arr = data.draw(np_arrays(dtype=valid_dtype)) assert arr.dtype == valid_dtype @@ -84,7 +84,7 @@ def test_fixed_number_of_dims(self, dims): class TestAttrsStrategy: - @given(attrs) + @given(attrs()) def test_type(self, attrs): assert isinstance(attrs, dict) # TODO how to test the types of values in a recursive object? From 4b888875d84ff62875a3598466ef56be692932f9 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Tue, 6 Sep 2022 22:41:09 -0400 Subject: [PATCH 058/155] valid_dtypes -> numeric_dtypes --- doc/api.rst | 2 +- doc/user-guide/testing.rst | 4 ++-- xarray/testing/strategies.py | 8 ++++---- xarray/tests/test_strategies.py | 4 ++-- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/doc/api.rst b/doc/api.rst index 1be613a37a4..1a7c97cff77 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -1071,7 +1071,7 @@ Hypothesis Testing Strategies .. autosummary:: :toctree: generated/ - testing.strategies.valid_dtypes + testing.strategies.numeric_dtypes testing.strategies.names testing.strategies.dimension_names testing.strategies.dimension_sizes diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index f4d86620c45..5eeaf7d3eb8 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -44,7 +44,7 @@ These strategies are accessible in the :py:module::`xarray.testing.strategies` m .. autosummary:: - testing.strategies.valid_dtypes + testing.strategies.numeric_dtypes testing.strategies.np_arrays testing.strategies.names testing.strategies.dimension_names @@ -118,7 +118,7 @@ For example you could create a ``chunks`` strategy to specify particular chunkin @given(st.data()) def test_something_else_inefficiently(data): - arrs = npst.arrays(dtype=valid_dtypes) # generates arrays of any shape + arrs = npst.arrays(dtype=numeric_dtypes) # generates arrays of any shape dims = xrst.dimension_names() # generates lists of any number of dimensions # Drawing examples from this strategy will raise a hypothesis.errors.Unsatisfiable error. diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 3224a7e6205..b520f6d5c96 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -8,7 +8,7 @@ import xarray as xr __all__ = [ - "valid_dtypes", + "numeric_dtypes", "names", "dimension_names", "dimension_sizes", @@ -22,7 +22,7 @@ # required to exclude weirder dtypes e.g. unicode, byte_string, array, or nested dtypes. -def valid_dtypes() -> st.SearchStrategy[np.dtype]: +def numeric_dtypes() -> st.SearchStrategy[np.dtype]: """Generates only those numpy dtypes which xarray can handle.""" return ( @@ -37,7 +37,7 @@ def np_arrays( shape: Union[Tuple[int], st.SearchStrategy[Tuple[int]]] = npst.array_shapes( max_side=4 ), - dtype: Union[np.dtype, st.SearchStrategy[np.dtype]] = valid_dtypes(), + dtype: Union[np.dtype, st.SearchStrategy[np.dtype]] = numeric_dtypes(), ) -> st.SearchStrategy[np.ndarray]: """ Generates arbitrary numpy arrays with xarray-compatible dtypes. @@ -48,7 +48,7 @@ def np_arrays( ---------- shape dtype - Default is to use any of the valid_dtypes defined for xarray. + Default is to use any of the numeric_dtypes defined for xarray. """ return npst.arrays(dtype=dtype, shape=shape) diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index 4b5b207babd..1f339bbd19b 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -21,7 +21,7 @@ dimension_names, dimension_sizes, np_arrays, - valid_dtypes, + numeric_dtypes, variables, ) @@ -37,7 +37,7 @@ def test_fixed_dtype(self, arr): @given(st.data()) def test_arbitrary_valid_dtype(self, data): - valid_dtype = data.draw(valid_dtypes()) + valid_dtype = data.draw(numeric_dtypes()) arr = data.draw(np_arrays(dtype=valid_dtype)) assert arr.dtype == valid_dtype From 2a1dc662ff84bc784128fa5de37fa41919c8f880 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Tue, 6 Sep 2022 22:44:22 -0400 Subject: [PATCH 059/155] changed hypothesis error type --- xarray/testing/strategies.py | 22 +++++++++++----------- xarray/tests/test_strategies.py | 8 ++++---- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index b520f6d5c96..27347de90c2 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -3,7 +3,7 @@ import hypothesis.extra.numpy as npst import hypothesis.strategies as st import numpy as np -from hypothesis.errors import Unsatisfiable +from hypothesis.errors import InvalidArgument import xarray as xr @@ -179,7 +179,7 @@ def variables( Raises ------ - hypothesis.errors.Unsatisfiable + hypothesis.errors.InvalidArgument If custom strategies passed try to draw examples which together cannot create a valid Variable. """ @@ -213,12 +213,12 @@ def variables( dims = draw(dims) # TODO is there another way to enforce these assumptions? This is very like to fail hypothesis' health checks - # TODO how do I write a test that checks that the hypothesis Unsatisfiable error will be raised? + # TODO how do I write a test that checks that the hypothesis InvalidArgument error will be raised? # TODO or we could just raise in this case? if isinstance(dims, List): data = draw(data) if data.ndim != len(dims): - raise Unsatisfiable( + raise InvalidArgument( f"Strategy attempting to generate data with {data.ndim} dims but {len(dims)} " "unique dimension names. Please only pass strategies which are guaranteed to " "draw compatible examples for data and dims." @@ -228,7 +228,7 @@ def variables( data = draw(data) shape = tuple(dims.values()) if data.shape != shape: - raise Unsatisfiable( + raise InvalidArgument( f"Strategy attempting to generate data with shape {data.shape} dims but dimension " f"sizes implying shape {shape}. Please only pass strategies which are guaranteed to " "draw compatible examples for data and dims." @@ -358,7 +358,7 @@ def dataarrays( Raises ------ - hypothesis.errors.Unsatisfiable + hypothesis.errors.InvalidArgument If custom strategies passed try to draw examples which together cannot create a valid DataArray. """ @@ -397,7 +397,7 @@ def dataarrays( dim_names = dims data = draw(data) if data.ndim != len(dims): - raise Unsatisfiable( + raise InvalidArgument( f"Strategy attempting to generate data with {data.ndim} dims but {len(dims)} " "unique dimension names. Please only pass strategies which are guaranteed to " "draw compatible examples for data and dims." @@ -409,7 +409,7 @@ def dataarrays( dim_sizes = dims dim_names, shape = list(dims.keys()), tuple(dims.values()) if data.shape != shape: - raise Unsatisfiable( + raise InvalidArgument( f"Strategy attempting to generate data with shape {data.shape} dims but dimension " f"sizes implying shape {shape}. Please only pass strategies which are guaranteed to " "draw compatible examples for data and dims." @@ -510,7 +510,7 @@ def datasets( Raises ------ - hypothesis.errors.Unsatisfiable + hypothesis.errors.InvalidArgument If custom strategies passed try to draw examples which together cannot create a valid DataArray. """ @@ -593,11 +593,11 @@ def _find_overall_sizes(vars: Mapping[str, xr.Variable]) -> Mapping[str, int]: def _check_compatible_sizes( vars: Mapping[str, xr.Variable], dim_sizes: Mapping[str, int] ): - """Check set of variables have sizes compatible with given dim_sizes. If not raise Unsatisfiable error.""" + """Check set of variables have sizes compatible with given dim_sizes. If not raise InvalidArgument error.""" for name, v in vars.items(): if not set(v.sizes.items()).issubset(set(dim_sizes.items())): - raise Unsatisfiable( + raise InvalidArgument( f"Strategy attempting to generate object with dimension sizes {dim_sizes} but drawn " f"variable {name} has sizes {v.sizes}, which is incompatible." "Please only pass strategies which are guaranteed to draw compatible examples for data " diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index 1f339bbd19b..8377a259db3 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -8,7 +8,7 @@ import hypothesis.extra.numpy as npst import hypothesis.strategies as st from hypothesis import given -from hypothesis.errors import Unsatisfiable +from hypothesis.errors import InvalidArgument from xarray import DataArray, Dataset from xarray.core.variable import Variable @@ -112,7 +112,7 @@ def test_given_arbitrary_dims_list_and_arbitrary_data(self, data): assert var.shape == (2, 3) dims = dimension_names(min_dims=3) - with pytest.raises(Unsatisfiable): + with pytest.raises(InvalidArgument): data.draw(variables(data=arrs, dims=dims)) @given(st.data()) @@ -219,7 +219,7 @@ def test_given_data_and_dims(self, data): assert da.shape == (2, 3) dims = dimension_names(min_dims=3, max_dims=3) - with pytest.raises(Unsatisfiable): + with pytest.raises(InvalidArgument): data.draw(dataarrays(data=arrs, dims=dims)) arrs = np_arrays(shape=(3, 4)) @@ -269,7 +269,7 @@ def test_given_data_and_dims(self, data): incompatible_dim_sizes = {"x": 1, "y": 4} data_vars = {"foo": Variable(data=[0, 1, 2], dims="x")} - with pytest.raises(Unsatisfiable, match="drawn variable"): + with pytest.raises(InvalidArgument, match="drawn variable"): data.draw( datasets( data_vars=st.just(data_vars), dims=st.just(incompatible_dim_sizes) From 9bddcec786cc87c73afc19df26505c80a2a45f35 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Tue, 6 Sep 2022 23:14:03 -0400 Subject: [PATCH 060/155] make all strategies keyword-arg only --- xarray/testing/strategies.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 27347de90c2..f503c4159d0 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -34,6 +34,7 @@ def numeric_dtypes() -> st.SearchStrategy[np.dtype]: def np_arrays( + *, shape: Union[Tuple[int], st.SearchStrategy[Tuple[int]]] = npst.array_shapes( max_side=4 ), @@ -60,6 +61,7 @@ def names() -> st.SearchStrategy[str]: def dimension_names( + *, min_dims: int = 0, max_dims: int = 3, ) -> st.SearchStrategy[List[str]]: @@ -85,6 +87,7 @@ def dimension_names( def dimension_sizes( + *, min_dims: int = 0, max_dims: int = 3, min_length: int = 1, @@ -149,6 +152,7 @@ def attrs() -> st.SearchStrategy[Mapping[str, Any]]: @st.composite def variables( draw: st.DrawFn, + *, data: st.SearchStrategy[T_Array] = None, dims: Union[ st.SearchStrategy[List[str]], st.SearchStrategy[Mapping[str, int]] @@ -253,6 +257,7 @@ def _unique_subset_of( @st.composite def _alignable_variables( draw: st.DrawFn, + *, dim_sizes: Mapping[str, int], ) -> st.SearchStrategy[List[xr.Variable]]: """Generates lists of variables with compatible (i.e. alignable) dimensions and sizes.""" @@ -266,6 +271,7 @@ def _alignable_variables( @st.composite def coordinate_variables( draw: st.DrawFn, + *, dim_sizes: Mapping[str, int], ) -> st.SearchStrategy[Mapping[str, xr.Variable]]: """ @@ -321,6 +327,7 @@ def coordinate_variables( @st.composite def dataarrays( draw: st.DrawFn, + *, data: st.SearchStrategy[T_Array] = None, coords: Mapping[str, xr.Variable] = None, dims: Union[ @@ -436,6 +443,7 @@ def dataarrays( @st.composite def data_variables( draw: st.DrawFn, + *, dim_sizes: Mapping[str, int], allowed_names: st.SearchStrategy[str] = names(), ) -> st.SearchStrategy[Mapping[str, xr.Variable]]: @@ -475,6 +483,7 @@ def data_variables( @st.composite def datasets( draw: st.DrawFn, + *, data_vars: st.SearchStrategy[Mapping[str, xr.Variable]] = None, coords: Mapping[str, xr.Variable] = None, dims: Union[ From b2887d43b92caf552bfe3e4cd590eef9df61fdcb Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Tue, 6 Sep 2022 23:21:46 -0400 Subject: [PATCH 061/155] min_length -> min_side --- xarray/testing/strategies.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index f503c4159d0..f29b4075bc4 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -90,8 +90,8 @@ def dimension_sizes( *, min_dims: int = 0, max_dims: int = 3, - min_length: int = 1, - max_length: int = None, + min_side: int = 1, + max_side: int = None, ) -> st.SearchStrategy[Mapping[str, int]]: """ Generates an arbitrary mapping from dimension names to lengths. @@ -106,20 +106,20 @@ def dimension_sizes( max_dims: int, optional Maximum number of dimensions in generated list. Default is 3. - min_length: int, optional + min_side: int, optional Minimum size of a dimension. Default is 1. - max_length: int, optional + max_side: int, optional Minimum size of a dimension. Default is `min_length` + 5. """ - if max_length is None: - max_length = min_length + 5 + if max_side is None: + max_side = min_side + 5 return st.dictionaries( keys=names(), - values=st.integers(min_value=min_length, max_value=max_length), + values=st.integers(min_value=min_side, max_value=max_side), min_size=min_dims, max_size=max_dims, ) From 3b8e8aec5e82b217c9cbdd045e571227a2e69abe Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Tue, 6 Sep 2022 23:21:58 -0400 Subject: [PATCH 062/155] correct error type --- doc/user-guide/testing.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index 5eeaf7d3eb8..69fc578023a 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -121,7 +121,7 @@ For example you could create a ``chunks`` strategy to specify particular chunkin arrs = npst.arrays(dtype=numeric_dtypes) # generates arrays of any shape dims = xrst.dimension_names() # generates lists of any number of dimensions - # Drawing examples from this strategy will raise a hypothesis.errors.Unsatisfiable error. + # Drawing examples from this strategy will raise a hypothesis.errors.InvalidArgument error. var = data.draw(xrst.variables(data=arrs, dims=dims)) assert ... From 0980061a522a4bad10d1c8071cf6cfc73cd899f3 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 7 Sep 2022 01:02:27 -0400 Subject: [PATCH 063/155] remove coords kwarg --- xarray/testing/strategies.py | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index f29b4075bc4..e3b3c54f354 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -329,7 +329,6 @@ def dataarrays( draw: st.DrawFn, *, data: st.SearchStrategy[T_Array] = None, - coords: Mapping[str, xr.Variable] = None, dims: Union[ st.SearchStrategy[List[str]], st.SearchStrategy[Mapping[str, int]] ] = None, @@ -350,9 +349,6 @@ def dataarrays( ---------- data: Strategy generating array-likes, optional Default is to generate numpy data of arbitrary shape, values and dtypes. - coords: Strategy generating mappings from coordinate names to xr.Variables objects, optional - Default is to generate an arbitrary combination of both dimension and non-dimension coordinates, - with sizes matching data and/or dims, but arbitrary names, dtypes, and values. dims: Strategy for generating the dimensions, optional Can either be a strategy for generating a list of string dimension names, or a strategy for generating a mapping of string dimension names to integer lengths along each dimension. @@ -371,8 +367,7 @@ def dataarrays( name = draw(st.none() | name) - if coords is not None: - raise NotImplementedError() + # TODO add a coords argument? if data is not None and dims is None: # no dims -> generate dims to match data @@ -485,7 +480,6 @@ def datasets( draw: st.DrawFn, *, data_vars: st.SearchStrategy[Mapping[str, xr.Variable]] = None, - coords: Mapping[str, xr.Variable] = None, dims: Union[ st.SearchStrategy[List[str]], st.SearchStrategy[Mapping[str, int]] ] = None, @@ -495,7 +489,7 @@ def datasets( Generates arbitrary xarray.Dataset objects. Follows the basic signature of the xarray.Dataset constructor, but you can also pass alternative strategies to - generate either numpy-like array data variables, dimensions, or coordinates. + generate either numpy-like array data variables or dimensions. Passing nothing will generate a completely arbitrary Dataset (backed by numpy arrays). @@ -506,9 +500,6 @@ def datasets( data_vars: Strategy generating mappings from variable names to xr.Variable objects, optional Default is to generate an arbitrary combination of compatible variables with sizes matching dims, but arbitrary names, dtypes, and values. - coords: Strategy generating mappings from coordinate names to xr.Variable objects, optional - Default is to generate an arbitrary combination of both dimension and non-dimension coordinates, - with sizes matching data_vars and/or dims, but arbitrary names, dtypes, and values. dims: Strategy for generating the dimensions, optional Can either be a strategy for generating a list of string dimension names, or a strategy for generating a mapping of string dimension names to integer lengths along each dimension. @@ -523,8 +514,7 @@ def datasets( If custom strategies passed try to draw examples which together cannot create a valid DataArray. """ - if coords is not None: - raise NotImplementedError() + # TODO add a coords argument? if data_vars is not None and dims is None: # no dims -> generate dims to match data From 0313b3e47f99892dd671b84373a884bbed4be152 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 7 Sep 2022 10:29:25 -0400 Subject: [PATCH 064/155] test different types of coordinates are sometimes generated --- xarray/tests/test_strategies.py | 46 +++++++++++++++++++++++++++------ 1 file changed, 38 insertions(+), 8 deletions(-) diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index 8377a259db3..bb68632937c 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -1,3 +1,5 @@ +import contextlib + import numpy as np import numpy.testing as npt import pytest @@ -7,7 +9,7 @@ import hypothesis.extra.numpy as npst import hypothesis.strategies as st -from hypothesis import given +from hypothesis import Phase, given, settings from hypothesis.errors import InvalidArgument from xarray import DataArray, Dataset @@ -181,13 +183,41 @@ def test_valid_set_of_coords(self, data): da = DataArray(data=arr, coords=coord_vars, dims=["x", "y"]) assert isinstance(da, DataArray) - def test_generates_1d_dim_coords(self): - # TODO having a `hypothesis.find(strat, predicate)` function would be very useful here - # see https://github.com/HypothesisWorks/hypothesis/issues/3436#issuecomment-1212369645 - ... - - def test_generates_non_dim_coords(self): - ... + def test_sometimes_generates_1d_dim_coords(self): + found_one = False + + @given(st.data()) + @settings(phases=[Phase.generate]) + def inner(data): + coord_vars = data.draw(coordinate_variables(dim_sizes={"x": 2, "y": 3})) + for name, var in coord_vars.items(): + if var.ndim == 1 and name == var.dims[0]: + nonlocal found_one + found_one = True + raise AssertionError # early stopping - test is correct but slower without this + + with contextlib.suppress(AssertionError): + inner() + + assert found_one + + def test_sometimes_generates_non_dim_coords(self): + found_one = False + + @given(st.data()) + @settings(phases=[Phase.generate]) + def inner(data): + coord_vars = data.draw(coordinate_variables(dim_sizes={"x": 2, "y": 3})) + for name, var in coord_vars.items(): + if var.ndim != 1 or (var.ndim == 1 and name != var.dims[0]): + nonlocal found_one + found_one = True + raise AssertionError # early stopping - test is correct but slower without this + + with contextlib.suppress(AssertionError): + inner() + + assert found_one class TestDataArraysStrategy: From e6ebb1fbe6c7077c0fb9e6cf45b616132f40a88a Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Wed, 7 Sep 2022 10:33:01 -0400 Subject: [PATCH 065/155] zip dict Co-authored-by: Zac Hatfield-Dodds --- xarray/testing/strategies.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index e3b3c54f354..4bfe1c197c3 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -471,8 +471,7 @@ def data_variables( ) ) - data_vars = {n: v for n, v in zip(var_names, vars)} - return data_vars + return dict(zip(var_names, vars)) @st.composite From 4da8772f5386e2b5fa7ded2330a4590f277719ab Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 7 Sep 2022 11:29:44 -0400 Subject: [PATCH 066/155] add dim_names kwarg to dimension_sizes strategy --- xarray/testing/strategies.py | 26 ++++++++++++++++++++++---- xarray/tests/test_strategies.py | 7 +++++++ 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index e3b3c54f354..8b596d78d71 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -23,7 +23,11 @@ # required to exclude weirder dtypes e.g. unicode, byte_string, array, or nested dtypes. def numeric_dtypes() -> st.SearchStrategy[np.dtype]: - """Generates only those numpy dtypes which xarray can handle.""" + """ + Generates only those numpy dtypes which xarray can handle. + + Requires the hypothesis package to be installed. + """ return ( npst.integer_dtypes() @@ -56,7 +60,11 @@ def np_arrays( def names() -> st.SearchStrategy[str]: - """Generates arbitrary string names for dimensions / variables.""" + """ + Generates arbitrary string names for dimensions / variables. + + Requires the hypothesis package to be installed. + """ return st.text(st.characters(), min_size=1) @@ -88,6 +96,7 @@ def dimension_names( def dimension_sizes( *, + dim_names: st.SearchStrategy[str] = names(), min_dims: int = 0, max_dims: int = 3, min_side: int = 1, @@ -100,6 +109,9 @@ def dimension_sizes( Parameters ---------- + dim_names: strategy generating strings, optional + Strategy for generating dimension names. + Defaults to the `names` strategy. min_dims: int, optional Minimum number of dimensions in generated list. Default is 1. @@ -118,7 +130,7 @@ def dimension_sizes( max_side = min_side + 5 return st.dictionaries( - keys=names(), + keys=dim_names, values=st.integers(min_value=min_side, max_value=max_side), min_size=min_dims, max_size=max_dims, @@ -136,7 +148,13 @@ def dimension_sizes( def attrs() -> st.SearchStrategy[Mapping[str, Any]]: - """Generates arbitrary valid attributes dictionaries for xarray objects.""" + """ + Generates arbitrary valid attributes dictionaries for xarray objects. + + The generated dictionaries can potentially be recursive. + + Requires the hypothesis package to be installed. + """ return st.recursive( st.dictionaries(_attr_keys, _attr_values), lambda children: st.dictionaries(_attr_keys, children), diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index bb68632937c..20c08873723 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -84,6 +84,13 @@ def test_fixed_number_of_dims(self, dims): assert isinstance(dims, dict) assert len(dims) == 3 + @given(st.data()) + def test_restrict_names(self, data): + capitalized_names = st.text(st.characters(), min_size=1).map(str.upper) + dim_sizes = data.draw(dimension_sizes(dim_names=capitalized_names)) + for d in dim_sizes.keys(): + assert d.upper() == d + class TestAttrsStrategy: @given(attrs()) From e6d7a34cf73bca1b87220d8ceed7350fb688f637 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 7 Sep 2022 11:58:56 -0400 Subject: [PATCH 067/155] return a dict from _alignable_variables --- xarray/testing/strategies.py | 36 +++++++++++++----------------------- 1 file changed, 13 insertions(+), 23 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 8b596d78d71..3eb9d5643c8 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -276,14 +276,18 @@ def _unique_subset_of( def _alignable_variables( draw: st.DrawFn, *, + var_names: st.SearchStrategy[str], dim_sizes: Mapping[str, int], -) -> st.SearchStrategy[List[xr.Variable]]: - """Generates lists of variables with compatible (i.e. alignable) dimensions and sizes.""" +) -> st.SearchStrategy[Mapping[str, xr.Variable]]: + """ + Generates dicts of names mapping to variables with compatible (i.e. alignable) dimensions and sizes. + """ alignable_dim_sizes = draw(_unique_subset_of(dim_sizes)) if dim_sizes else {} + vars = variables(dims=st.just(alignable_dim_sizes)) # TODO don't hard code max number of variables - return draw(st.lists(variables(dims=st.just(alignable_dim_sizes)), max_size=3)) + return draw(st.dictionaries(var_names, vars, max_size=3)) @st.composite @@ -323,20 +327,14 @@ def coordinate_variables( # Possibly generate ND "non-dimension coordinates" - explicit possibility not to include any helps with shrinking if draw(st.booleans()): - non_dim_coord_vars = draw(_alignable_variables(dim_sizes=dim_sizes)) # can't have same name as a dimension valid_non_dim_coord_names = names().filter(lambda n: n not in dim_names) - non_dim_coord_names = draw( - st.lists( - valid_non_dim_coord_names, - min_size=len(non_dim_coord_vars), - max_size=len(non_dim_coord_vars), - unique=True, + non_dim_coords = draw( + _alignable_variables( + var_names=valid_non_dim_coord_names, dim_sizes=dim_sizes ) ) - - non_dim_coords = {n: v for n, v in zip(non_dim_coord_names, non_dim_coord_vars)} all_coords.update(non_dim_coords) return all_coords @@ -472,24 +470,16 @@ def data_variables( allowed_names: Strategy generating strings Allowed names for data variables. Needed to avoid conflict with names of coordinate variables & dimensions. """ - # TODO these shouldn't have the same name as any dimensions or any coordinates... - vars = draw(_alignable_variables(dim_sizes=dim_sizes)) + # TODO these also shouldn't have the same name as any dimensions or any coordinates... dim_names = list(dim_sizes.keys()) # can't have same name as a dimension # TODO this is also used in coordinate_variables so refactor it out into separate function valid_var_names = allowed_names.filter(lambda n: n not in dim_names) - # TODO do I actually need to draw from st.lists for this? - var_names = draw( - st.lists( - valid_var_names, - min_size=len(vars), - max_size=len(vars), - unique=True, - ) + data_vars = draw( + _alignable_variables(var_names=valid_var_names, dim_sizes=dim_sizes) ) - data_vars = {n: v for n, v in zip(var_names, vars)} return data_vars From 15812fdc1337ed4f05f4769ebbe54a0cddd0eba3 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 7 Sep 2022 13:12:49 -0400 Subject: [PATCH 068/155] add coord_names arg to coordinate_variables strategy --- xarray/testing/strategies.py | 33 ++++++++++++++++++++------------- xarray/tests/test_strategies.py | 25 +++++++++++++++++++++---- 2 files changed, 41 insertions(+), 17 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 4fc787d3007..39bdb463bed 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -295,6 +295,7 @@ def coordinate_variables( draw: st.DrawFn, *, dim_sizes: Mapping[str, int], + coord_names: st.SearchStrategy[str] = names(), ) -> st.SearchStrategy[Mapping[str, xr.Variable]]: """ Generates dicts of alignable Variable objects for use as coordinates. @@ -308,6 +309,8 @@ def coordinate_variables( ---------- dim_sizes: Mapping of str to int Sizes of dimensions to use for coordinates. + coord_names: Strategy generating strings, optional + Allowed names for non-dimension coordinates. Defaults to `names` strategy. """ dim_names = list(dim_sizes.keys()) @@ -329,7 +332,7 @@ def coordinate_variables( if draw(st.booleans()): # can't have same name as a dimension - valid_non_dim_coord_names = names().filter(lambda n: n not in dim_names) + valid_non_dim_coord_names = coord_names.filter(lambda n: n not in dim_names) non_dim_coords = draw( _alignable_variables( var_names=valid_non_dim_coord_names, dim_sizes=dim_sizes @@ -456,7 +459,7 @@ def data_variables( draw: st.DrawFn, *, dim_sizes: Mapping[str, int], - allowed_names: st.SearchStrategy[str] = names(), + var_names: st.SearchStrategy[str] = names(), ) -> st.SearchStrategy[Mapping[str, xr.Variable]]: """ Generates dicts of alignable Variable objects for use as Dataset data variables. @@ -467,15 +470,14 @@ def data_variables( ---------- dim_sizes: Mapping of str to int Sizes of dimensions to use for variables. - allowed_names: Strategy generating strings + var_names: Strategy generating strings Allowed names for data variables. Needed to avoid conflict with names of coordinate variables & dimensions. """ - # TODO these also shouldn't have the same name as any dimensions or any coordinates... dim_names = list(dim_sizes.keys()) # can't have same name as a dimension # TODO this is also used in coordinate_variables so refactor it out into separate function - valid_var_names = allowed_names.filter(lambda n: n not in dim_names) + valid_var_names = var_names.filter(lambda n: n not in dim_names) data_vars = draw( _alignable_variables(var_names=valid_var_names, dim_sizes=dim_sizes) ) @@ -527,8 +529,11 @@ def datasets( # no dims -> generate dims to match data data_vars = draw(data_vars) dim_sizes = _find_overall_sizes(data_vars) - # TODO only draw coordinate variables whose names don't conflict with data variables - coords = draw(coordinate_variables(dim_sizes=dim_sizes)) + # only draw coordinate variables whose names don't conflict with data variables + allowed_coord_names = names().filter(lambda n: n not in list(data_vars.keys())) + coords = draw( + coordinate_variables(coord_names=allowed_coord_names, dim_sizes=dim_sizes) + ) elif data_vars is None and dims is not None: # no data -> generate data to match dims @@ -543,7 +548,7 @@ def datasets( allowed_data_var_names = names().filter(lambda n: n not in coord_names) data_vars = draw( data_variables( - dim_sizes=dim_sizes, allowed_names=allowed_data_var_names + dim_sizes=dim_sizes, var_names=allowed_data_var_names ) ) @@ -559,8 +564,11 @@ def datasets( data_vars = draw(data_vars) _check_compatible_sizes(data_vars, dim_sizes) - # TODO only draw coordinate variables whose names don't conflict with data variables - coords = draw(coordinate_variables(dim_sizes=dim_sizes)) + # only draw coordinate variables whose names don't conflict with data variables + allowed_coord_names = names().filter(lambda n: n not in list(data_vars.keys())) + coords = draw( + coordinate_variables(coord_names=allowed_coord_names, dim_sizes=dim_sizes) + ) else: # nothing provided, so generate everything consistently by drawing data to match dims, and coords to match both @@ -572,14 +580,13 @@ def datasets( else: coords = {} - coord_names = list(coords.keys()) - allowed_data_var_names = names().filter(lambda n: n not in coord_names) + allowed_data_var_names = names().filter(lambda n: n not in list(coords.keys())) # Allow for no data variables - helps with shrinking if draw(st.booleans()): draw( data_variables( - dim_sizes=dim_sizes, allowed_names=allowed_data_var_names + dim_sizes=dim_sizes, var_names=allowed_data_var_names ) ) else: diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index 20c08873723..5ea220db719 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -88,8 +88,8 @@ def test_fixed_number_of_dims(self, dims): def test_restrict_names(self, data): capitalized_names = st.text(st.characters(), min_size=1).map(str.upper) dim_sizes = data.draw(dimension_sizes(dim_names=capitalized_names)) - for d in dim_sizes.keys(): - assert d.upper() == d + for dim in dim_sizes.keys(): + assert dim.upper() == dim class TestAttrsStrategy: @@ -226,6 +226,18 @@ def inner(data): assert found_one + @given(st.data()) + def test_restrict_names(self, data): + capitalized_names = st.text(st.characters(), min_size=1).map(str.upper) + coord_vars = data.draw( + coordinate_variables( + dim_sizes={"x": 2, "y": 3}, coord_names=capitalized_names + ) + ) + for name in coord_vars.keys(): + if name not in ['x', 'y']: + assert name.upper() == name + class TestDataArraysStrategy: @given(dataarrays()) @@ -275,8 +287,13 @@ def test_given_only_sizes(self, data): assert set(v.sizes.items()).issubset(set(dim_sizes.items())) @given(st.data()) - def test_given_restricted_names(self, data): - ... + def test_restrict_names(self, data): + capitalized_names = st.text(st.characters(), min_size=1).map(str.upper) + data_vars = data.draw( + data_variables(dim_sizes={"x": 2, "y": 3}, var_names=capitalized_names) + ) + for name in data_vars.keys(): + assert name.upper() == name class TestDatasetsStrategy: From 4374681b1c967e59c4fa27540dcba6c7cad8d4af Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 7 Sep 2022 17:37:19 +0000 Subject: [PATCH 069/155] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/testing/strategies.py | 10 ++-------- xarray/tests/test_strategies.py | 2 +- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 39bdb463bed..7634c355bec 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -547,9 +547,7 @@ def datasets( coord_names = list(coords.keys()) allowed_data_var_names = names().filter(lambda n: n not in coord_names) data_vars = draw( - data_variables( - dim_sizes=dim_sizes, var_names=allowed_data_var_names - ) + data_variables(dim_sizes=dim_sizes, var_names=allowed_data_var_names) ) elif data_vars is not None and dims is not None: @@ -584,11 +582,7 @@ def datasets( # Allow for no data variables - helps with shrinking if draw(st.booleans()): - draw( - data_variables( - dim_sizes=dim_sizes, var_names=allowed_data_var_names - ) - ) + draw(data_variables(dim_sizes=dim_sizes, var_names=allowed_data_var_names)) else: data_vars = {} diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index 5ea220db719..b00e5a2c871 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -235,7 +235,7 @@ def test_restrict_names(self, data): ) ) for name in coord_vars.keys(): - if name not in ['x', 'y']: + if name not in ["x", "y"]: assert name.upper() == name From 0f0c4fbf0a7c969d171e41b091e8599a3bb2c445 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 7 Sep 2022 13:57:41 -0400 Subject: [PATCH 070/155] change typing of dims arg --- xarray/testing/strategies.py | 70 ++++++++++++++++----------------- xarray/tests/test_strategies.py | 2 +- 2 files changed, 35 insertions(+), 37 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 39bdb463bed..e7b4b9b176a 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -1,4 +1,4 @@ -from typing import Any, Hashable, List, Mapping, Tuple, Union +from typing import Any, Hashable, List, Mapping, Sequence, Tuple, Union import hypothesis.extra.numpy as npst import hypothesis.strategies as st @@ -172,9 +172,7 @@ def variables( draw: st.DrawFn, *, data: st.SearchStrategy[T_Array] = None, - dims: Union[ - st.SearchStrategy[List[str]], st.SearchStrategy[Mapping[str, int]] - ] = None, + dims: st.SearchStrategy[Union[Sequence[str], Mapping[str, int]]] = None, attrs: st.SearchStrategy[Mapping] = attrs(), ) -> st.SearchStrategy[xr.Variable]: """ @@ -192,7 +190,7 @@ def variables( data: Strategy generating array-likes, optional Default is to generate numpy data of arbitrary shape, values and dtype. dims: Strategy for generating the dimensions, optional - Can either be a strategy for generating a list of string dimension names, + Can either be a strategy for generating a sequence of string dimension names, or a strategy for generating a mapping of string dimension names to integer lengths along each dimension. If provided in the former form the lengths of the returned Variable will either be determined from the data argument if given or arbitrarily generated if not. @@ -222,21 +220,20 @@ def variables( elif dims is not None and data is None: # no data -> generate data to match dims dims = draw(dims) - if isinstance(dims, List): + if isinstance(dims, Sequence): valid_shapes = npst.array_shapes(min_dims=len(dims), max_dims=len(dims)) data = draw(np_arrays(shape=draw(valid_shapes))) - else: + elif isinstance(dims, Mapping): # should be a mapping of form {dim_names: lengths} shape = tuple(dims.values()) data = draw(np_arrays(shape=shape)) + else: + raise ValueError(f"Invalid type for dims argument - got type {type(dims)}") elif data is not None and dims is not None: # both data and dims provided -> check drawn examples are compatible dims = draw(dims) - # TODO is there another way to enforce these assumptions? This is very like to fail hypothesis' health checks - # TODO how do I write a test that checks that the hypothesis InvalidArgument error will be raised? - # TODO or we could just raise in this case? if isinstance(dims, List): data = draw(data) if data.ndim != len(dims): @@ -245,7 +242,7 @@ def variables( "unique dimension names. Please only pass strategies which are guaranteed to " "draw compatible examples for data and dims." ) - else: + elif isinstance(dims, Mapping): # should be a mapping of form {dim_names: lengths} data = draw(data) shape = tuple(dims.values()) @@ -255,6 +252,8 @@ def variables( f"sizes implying shape {shape}. Please only pass strategies which are guaranteed to " "draw compatible examples for data and dims." ) + else: + raise ValueError(f"Invalid type for dims argument - got type {type(dims)}") else: # nothing provided, so generate everything consistently by drawing dims to match data @@ -348,9 +347,7 @@ def dataarrays( draw: st.DrawFn, *, data: st.SearchStrategy[T_Array] = None, - dims: Union[ - st.SearchStrategy[List[str]], st.SearchStrategy[Mapping[str, int]] - ] = None, + dims: st.SearchStrategy[Union[Sequence[str], Mapping[str, int]]] = None, name: st.SearchStrategy[Union[str, None]] = names(), attrs: st.SearchStrategy[Mapping] = attrs(), ) -> st.SearchStrategy[xr.DataArray]: @@ -369,7 +366,7 @@ def dataarrays( data: Strategy generating array-likes, optional Default is to generate numpy data of arbitrary shape, values and dtypes. dims: Strategy for generating the dimensions, optional - Can either be a strategy for generating a list of string dimension names, + Can either be a strategy for generating a sequence of string dimension names, or a strategy for generating a mapping of string dimension names to integer lengths along each dimension. If provided in the former form the lengths of the returned Variable will either be determined from the data argument if given or arbitrarily generated if not. @@ -398,23 +395,25 @@ def dataarrays( elif data is None and dims is not None: # no data -> generate data to match dims dims = draw(dims) - if isinstance(dims, List): + if isinstance(dims, Sequence): dim_names = dims valid_shapes = npst.array_shapes(min_dims=len(dims), max_dims=len(dims)) data = draw(np_arrays(shape=draw(valid_shapes))) dim_sizes = {n: l for n, l in zip(dims, data.shape)} coords = draw(coordinate_variables(dim_sizes=dim_sizes)) - else: + elif isinstance(dims, Mapping): # should be a mapping of form {dim_names: lengths} dim_names, shape = list(dims.keys()), tuple(dims.values()) data = draw(np_arrays(shape=shape)) coords = draw(coordinate_variables(dim_sizes=dims)) + else: + raise ValueError(f"Invalid type for dims argument - got type {type(dims)}") elif data is not None and dims is not None: # both data and dims provided -> check drawn examples are compatible dims = draw(dims) - if isinstance(dims, List): + if isinstance(dims, Sequence): dim_names = dims data = draw(data) if data.ndim != len(dims): @@ -424,7 +423,7 @@ def dataarrays( "draw compatible examples for data and dims." ) dim_sizes = {n: l for n, l in zip(dims, data.shape)} - else: + elif isinstance(dims, Mapping): # should be a mapping of form {dim_names: lengths} data = draw(data) dim_sizes = dims @@ -435,6 +434,8 @@ def dataarrays( f"sizes implying shape {shape}. Please only pass strategies which are guaranteed to " "draw compatible examples for data and dims." ) + else: + raise ValueError(f"Invalid type for dims argument - got type {type(dims)}") coords = draw(coordinate_variables(dim_sizes=dim_sizes)) @@ -489,9 +490,7 @@ def datasets( draw: st.DrawFn, *, data_vars: st.SearchStrategy[Mapping[str, xr.Variable]] = None, - dims: Union[ - st.SearchStrategy[List[str]], st.SearchStrategy[Mapping[str, int]] - ] = None, + dims: st.SearchStrategy[Union[Sequence[str], Mapping[str, int]]] = None, attrs: st.SearchStrategy[Mapping] = attrs(), ) -> st.SearchStrategy[xr.Dataset]: """ @@ -510,7 +509,7 @@ def datasets( Default is to generate an arbitrary combination of compatible variables with sizes matching dims, but arbitrary names, dtypes, and values. dims: Strategy for generating the dimensions, optional - Can either be a strategy for generating a list of string dimension names, + Can either be a strategy for generating a sequence of string dimension names, or a strategy for generating a mapping of string dimension names to integer lengths along each dimension. If provided in the former form the lengths of the returned Variable will either be determined from the data argument if given or arbitrarily generated if not. @@ -537,32 +536,35 @@ def datasets( elif data_vars is None and dims is not None: # no data -> generate data to match dims - if isinstance(dims, List): + dims = draw(dims) + if isinstance(dims, Sequence): # TODO support dims as list too? raise NotImplementedError() - else: + elif isinstance(dims, Mapping): # should be a mapping of form {dim_names: lengths} - dim_sizes = draw(dims) + dim_sizes = dims coords = draw(coordinate_variables(dim_sizes=dim_sizes)) coord_names = list(coords.keys()) allowed_data_var_names = names().filter(lambda n: n not in coord_names) data_vars = draw( - data_variables( - dim_sizes=dim_sizes, var_names=allowed_data_var_names - ) + data_variables(dim_sizes=dim_sizes, var_names=allowed_data_var_names) ) + else: + raise ValueError(f"Invalid type for dims argument - got type {type(dims)}") elif data_vars is not None and dims is not None: # both data and dims provided -> check drawn examples are compatible dims = draw(dims) - if isinstance(dims, List): + if isinstance(dims, Sequence): # TODO support dims as list too? raise NotImplementedError() - else: + elif isinstance(dims, Mapping): # should be a mapping of form {dim_names: lengths} dim_sizes = dims data_vars = draw(data_vars) _check_compatible_sizes(data_vars, dim_sizes) + else: + raise ValueError(f"Invalid type for dims argument - got type {type(dims)}") # only draw coordinate variables whose names don't conflict with data variables allowed_coord_names = names().filter(lambda n: n not in list(data_vars.keys())) @@ -584,11 +586,7 @@ def datasets( # Allow for no data variables - helps with shrinking if draw(st.booleans()): - draw( - data_variables( - dim_sizes=dim_sizes, var_names=allowed_data_var_names - ) - ) + draw(data_variables(dim_sizes=dim_sizes, var_names=allowed_data_var_names)) else: data_vars = {} diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index 5ea220db719..b00e5a2c871 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -235,7 +235,7 @@ def test_restrict_names(self, data): ) ) for name in coord_vars.keys(): - if name not in ['x', 'y']: + if name not in ["x", "y"]: assert name.upper() == name From 6a30af54c2427d2fb85b8124dbdbb080bcd54825 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 7 Sep 2022 15:15:07 -0400 Subject: [PATCH 071/155] support dims as list to datasets strat when data not given --- xarray/testing/strategies.py | 36 +++++++++++++++++---------------- xarray/tests/test_strategies.py | 16 +++++++++++++++ 2 files changed, 35 insertions(+), 17 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index e7b4b9b176a..175436f78c7 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -342,6 +342,11 @@ def coordinate_variables( return all_coords +def _sizes_from_dim_names(dims: Sequence[str]) -> st.SearchStrategy[Mapping[str, int]]: + size_along_dim = st.integers(min_value=1, max_value=6) + return st.fixed_dictionaries({d: size_along_dim for d in dims}) + + @st.composite def dataarrays( draw: st.DrawFn, @@ -396,20 +401,17 @@ def dataarrays( # no data -> generate data to match dims dims = draw(dims) if isinstance(dims, Sequence): - dim_names = dims - valid_shapes = npst.array_shapes(min_dims=len(dims), max_dims=len(dims)) - data = draw(np_arrays(shape=draw(valid_shapes))) - dim_sizes = {n: l for n, l in zip(dims, data.shape)} - coords = draw(coordinate_variables(dim_sizes=dim_sizes)) - + dim_sizes = draw(_sizes_from_dim_names(dims)) elif isinstance(dims, Mapping): # should be a mapping of form {dim_names: lengths} - dim_names, shape = list(dims.keys()), tuple(dims.values()) - data = draw(np_arrays(shape=shape)) - coords = draw(coordinate_variables(dim_sizes=dims)) + dim_sizes = dims else: raise ValueError(f"Invalid type for dims argument - got type {type(dims)}") + dim_names, shape = list(dim_sizes.keys()), tuple(dim_sizes.values()) + data = draw(np_arrays(shape=shape)) + coords = draw(coordinate_variables(dim_sizes=dim_sizes)) + elif data is not None and dims is not None: # both data and dims provided -> check drawn examples are compatible dims = draw(dims) @@ -538,20 +540,20 @@ def datasets( # no data -> generate data to match dims dims = draw(dims) if isinstance(dims, Sequence): - # TODO support dims as list too? - raise NotImplementedError() + dim_sizes = draw(_sizes_from_dim_names(dims)) elif isinstance(dims, Mapping): # should be a mapping of form {dim_names: lengths} dim_sizes = dims - coords = draw(coordinate_variables(dim_sizes=dim_sizes)) - coord_names = list(coords.keys()) - allowed_data_var_names = names().filter(lambda n: n not in coord_names) - data_vars = draw( - data_variables(dim_sizes=dim_sizes, var_names=allowed_data_var_names) - ) else: raise ValueError(f"Invalid type for dims argument - got type {type(dims)}") + coords = draw(coordinate_variables(dim_sizes=dim_sizes)) + coord_names = list(coords.keys()) + allowed_data_var_names = names().filter(lambda n: n not in coord_names) + data_vars = draw( + data_variables(dim_sizes=dim_sizes, var_names=allowed_data_var_names) + ) + elif data_vars is not None and dims is not None: # both data and dims provided -> check drawn examples are compatible dims = draw(dims) diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index b00e5a2c871..4c628be168d 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -310,12 +310,18 @@ def test_given_data(self, data): @given(st.data()) def test_given_dims(self, data): + dims = ["x", "y"] + ds = data.draw(datasets(dims=st.just(dims))) + assert set(ds.dims).issubset(set(dims)) + dim_sizes = {"x": 3, "y": 4} ds = data.draw(datasets(dims=st.just(dim_sizes))) assert set(ds.sizes.items()).issubset(set(dim_sizes.items())) @given(st.data()) def test_given_data_and_dims(self, data): + + # pass dims as mapping dim_sizes = {"x": 3, "y": 4} data_vars = data.draw(data_variables(dim_sizes=dim_sizes)) ds = data.draw(datasets(data_vars=st.just(data_vars), dims=st.just(dim_sizes))) @@ -329,3 +335,13 @@ def test_given_data_and_dims(self, data): data_vars=st.just(data_vars), dims=st.just(incompatible_dim_sizes) ) ) + + @pytest.mark.xfail(reason="not implemented") + @given(st.data()) + def test_given_data_and_dims_as_sequence(self, data): + # pass dims as sequence + dim_sizes = {"x": 3, "y": 4} + dims = list(dim_sizes.keys()) + data_vars = data.draw(data_variables(dim_sizes=dim_sizes)) + ds = data.draw(datasets(data_vars=st.just(data_vars), dims=st.just(dims))) + assert set(ds.sizes.items()).issubset(set(dim_sizes.items())) From 177d908e6a9d4bda262a7004600b5dff03ef46c3 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 7 Sep 2022 15:37:03 -0400 Subject: [PATCH 072/155] put coord and data var generation in optional branch to try to improve shrinking --- xarray/testing/strategies.py | 87 ++++++++++++++++++------------------ 1 file changed, 44 insertions(+), 43 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 175436f78c7..d9bd32e7583 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -311,33 +311,38 @@ def coordinate_variables( coord_names: Strategy generating strings, optional Allowed names for non-dimension coordinates. Defaults to `names` strategy. """ - dim_names = list(dim_sizes.keys()) all_coords = {} - # Possibly generate 1D "dimension coordinates" - explicit possibility not to include amy helps with shrinking - if dim_names and draw(st.booleans()): - # first generate subset of dimension names - these set which dimension coords will be included - dim_coord_names_and_lengths = draw(_unique_subset_of(dim_sizes)) + if draw( + st.booleans() + ): # Allow for no coordinate variables - explicit possibility not to helps with shrinking - # then generate 1D variables for each name - dim_coords = { - n: draw(variables(dims=st.just({n: l}))) - for n, l in dim_coord_names_and_lengths.items() - } - all_coords.update(dim_coords) + dim_names = list(dim_sizes.keys()) - # Possibly generate ND "non-dimension coordinates" - explicit possibility not to include any helps with shrinking - if draw(st.booleans()): + # Possibly generate 1D "dimension coordinates" - explicit possibility not to helps with shrinking + if dim_names and draw(st.booleans()): + # first generate subset of dimension names - these set which dimension coords will be included + dim_coord_names_and_lengths = draw(_unique_subset_of(dim_sizes)) - # can't have same name as a dimension - valid_non_dim_coord_names = coord_names.filter(lambda n: n not in dim_names) - non_dim_coords = draw( - _alignable_variables( - var_names=valid_non_dim_coord_names, dim_sizes=dim_sizes + # then generate 1D variables for each name + dim_coords = { + n: draw(variables(dims=st.just({n: l}))) + for n, l in dim_coord_names_and_lengths.items() + } + all_coords.update(dim_coords) + + # Possibly generate ND "non-dimension coordinates" - explicit possibility not to helps with shrinking + if draw(st.booleans()): + + # can't have same name as a dimension + valid_non_dim_coord_names = coord_names.filter(lambda n: n not in dim_names) + non_dim_coords = draw( + _alignable_variables( + var_names=valid_non_dim_coord_names, dim_sizes=dim_sizes + ) ) - ) - all_coords.update(non_dim_coords) + all_coords.update(non_dim_coords) return all_coords @@ -415,9 +420,9 @@ def dataarrays( elif data is not None and dims is not None: # both data and dims provided -> check drawn examples are compatible dims = draw(dims) + data = draw(data) if isinstance(dims, Sequence): dim_names = dims - data = draw(data) if data.ndim != len(dims): raise InvalidArgument( f"Strategy attempting to generate data with {data.ndim} dims but {len(dims)} " @@ -427,9 +432,8 @@ def dataarrays( dim_sizes = {n: l for n, l in zip(dims, data.shape)} elif isinstance(dims, Mapping): # should be a mapping of form {dim_names: lengths} - data = draw(data) dim_sizes = dims - dim_names, shape = list(dims.keys()), tuple(dims.values()) + dim_names, shape = list(dim_sizes.keys()), tuple(dim_sizes.values()) if data.shape != shape: raise InvalidArgument( f"Strategy attempting to generate data with shape {data.shape} dims but dimension " @@ -476,14 +480,20 @@ def data_variables( var_names: Strategy generating strings Allowed names for data variables. Needed to avoid conflict with names of coordinate variables & dimensions. """ - dim_names = list(dim_sizes.keys()) + if draw( + st.booleans() + ): # Allow for no coordinate variables - explicit possibility not to helps with shrinking + dim_names = list(dim_sizes.keys()) + + # can't have same name as a dimension + # TODO this is also used in coordinate_variables so refactor it out into separate function + valid_var_names = var_names.filter(lambda n: n not in dim_names) + data_vars = draw( + _alignable_variables(var_names=valid_var_names, dim_sizes=dim_sizes) + ) + else: + data_vars = {} - # can't have same name as a dimension - # TODO this is also used in coordinate_variables so refactor it out into separate function - valid_var_names = var_names.filter(lambda n: n not in dim_names) - data_vars = draw( - _alignable_variables(var_names=valid_var_names, dim_sizes=dim_sizes) - ) return data_vars @@ -577,20 +587,11 @@ def datasets( else: # nothing provided, so generate everything consistently by drawing data to match dims, and coords to match both dim_sizes = draw(dimension_sizes()) - - # Allow for no coordinate variables - helps with shrinking - if draw(st.booleans()): - coords = draw(coordinate_variables(dim_sizes=dim_sizes)) - else: - coords = {} - + coords = draw(coordinate_variables(dim_sizes=dim_sizes)) allowed_data_var_names = names().filter(lambda n: n not in list(coords.keys())) - - # Allow for no data variables - helps with shrinking - if draw(st.booleans()): - draw(data_variables(dim_sizes=dim_sizes, var_names=allowed_data_var_names)) - else: - data_vars = {} + data_vars = draw( + data_variables(dim_sizes=dim_sizes, var_names=allowed_data_var_names) + ) return xr.Dataset(data_vars=data_vars, coords=coords, attrs=draw(attrs)) From 5424e37914cf4953448ccac577766ffe00366fcb Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 7 Sep 2022 15:40:28 -0400 Subject: [PATCH 073/155] improve simple test example --- doc/user-guide/testing.rst | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index 69fc578023a..01b6ac3420a 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -76,8 +76,6 @@ range of data that the xarray strategies can generate. In your tests however you should not use ``.example()`` - instead you should parameterize your tests with the ``hypothesis.given`` decorator: -# TODO finishsimple test example - .. ipython:: python from hypothesis import given @@ -85,8 +83,8 @@ In your tests however you should not use ``.example()`` - instead you should par .. ipython:: python @given(xrst.dataarrays()) - def test_something(da): - ... + def test_function_that_acts_on_dataarrays(da): + assert func(da) == ... Chaining Strategies From c8712736f45cd4ec883af50ad2b96c516c6b559b Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 7 Sep 2022 17:23:48 -0400 Subject: [PATCH 074/155] add documentation on creating duck arrays --- doc/user-guide/testing.rst | 51 +++++++++++++++++++++++++++++++++++--- 1 file changed, 47 insertions(+), 4 deletions(-) diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index 01b6ac3420a..a49027bd93e 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -94,7 +94,6 @@ Xarray's strategies can accept other strategies as arguments, allowing you to cu examples. .. ipython:: python - :okexcept: # generate a DataArray with shape (3, 4), but all other details still arbitrary xrst.dataarrays( @@ -157,10 +156,9 @@ To fix the length of dimensions you can instead pass `dims` as a mapping of dime You can also use this to specify that you want examples which are missing some part of the data structure, for instance .. ipython:: python - :okexcept: # Generates only dataarrays with no coordinates - xrst.dataarrays(coords=st.just({})).example() + xrst.datasets(data_vars=st.just({})).example() Through a combination of chaining strategies and fixing arguments, you can specify quite complicated requirements on the objects your chained strategy will generate. @@ -189,4 +187,49 @@ arbitrary ``DataArray`` objects whose dimensions will always match these specifi Creating Duck-type Arrays ~~~~~~~~~~~~~~~~~~~~~~~~~ -# TODO creating duckarrays by passing custom strategies to data arg +Xarray objects don't have to wrap numpy arrays, in fact they can wrap any array type which presents the same API as a +numpy array (so-called "duck array wrapping", see :ref:`_internals.duck_arrays`). + +Imagine we want to write a strategy which generates arbitrary `DataArray` objects, each of which wraps a +``sparse.COO`` array instead of a ``numpy.ndarray``. How could we do that? There are two ways: + +1. Create a xarray object with numpy data and use ``.map()`` to convert the underlying array to a +different type: + +.. ipython:: python + + import sparse + +.. ipython:: python + :okexcept: + + def convert_to_sparse(arr): + if arr.ndim == 0: + return arr + else: + return sparse.COO.from_numpy(arr) + + + sparse_dataarrays = xrst.dataarrays(attrs=st.just({})).map(convert_to_sparse) + + sparse_dataarrays.example() + sparse_dataarrays.example() + +2. Pass a strategy which generates the duck-typed arrays directly to the ``data`` argument of the xarray +strategies: + +.. ipython:: python + :okexcept: + + @st.composite + def sparse_arrays(draw) -> st.SearchStrategy[sparse._coo.core.COO]: + """Strategy which generates random sparse.COO arrays""" + shape = draw(npst.array_shapes()) + density = draw(st.integers(min_value=0, max_value=1)) + return sparse.random(shape, density=density) + + + sparse_dataarrays = xrst.dataarrays(data=sparse_arrays(), attrs=st.just({})) + + sparse_dataarrays.example() + sparse_dataarrays.example() From 7730a27756c0d499eea1bc6a9727bec98ff29203 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 7 Sep 2022 17:31:15 -0400 Subject: [PATCH 075/155] okexcept for sparse examples --- doc/user-guide/testing.rst | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index a49027bd93e..f8d87036638 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -197,6 +197,7 @@ Imagine we want to write a strategy which generates arbitrary `DataArray` object different type: .. ipython:: python + :okexcept: import sparse @@ -209,8 +210,10 @@ different type: else: return sparse.COO.from_numpy(arr) +.. ipython:: python + :okexcept: - sparse_dataarrays = xrst.dataarrays(attrs=st.just({})).map(convert_to_sparse) + sparse_dataarrays = xrst.dataarrays().map(convert_to_sparse) sparse_dataarrays.example() sparse_dataarrays.example() @@ -228,8 +231,10 @@ strategies: density = draw(st.integers(min_value=0, max_value=1)) return sparse.random(shape, density=density) +.. ipython:: python + :okexcept: - sparse_dataarrays = xrst.dataarrays(data=sparse_arrays(), attrs=st.just({})) + sparse_dataarrays = xrst.dataarrays(data=sparse_arrays()) sparse_dataarrays.example() sparse_dataarrays.example() From 24549bcb80c40571a3b699b9519f53dded11e8e7 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 7 Sep 2022 17:59:29 -0400 Subject: [PATCH 076/155] fix sparse dataarrays example --- doc/user-guide/testing.rst | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index f8d87036638..88d8f413eb4 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -204,11 +204,12 @@ different type: .. ipython:: python :okexcept: - def convert_to_sparse(arr): - if arr.ndim == 0: - return arr + def convert_to_sparse(da): + if da.ndim == 0: + return da else: - return sparse.COO.from_numpy(arr) + da.data = sparse.COO.from_numpy(da.values) + return da .. ipython:: python :okexcept: @@ -238,3 +239,6 @@ strategies: sparse_dataarrays.example() sparse_dataarrays.example() + +Either approach is fine, but one may be more convenient than the other depending on the type of the duck array which you +want to wrap. From 3082a0978ec16183b80865b4f78085ce91d9b54b Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 7 Sep 2022 18:00:39 -0400 Subject: [PATCH 077/155] todo about building a duck array dataset --- doc/user-guide/testing.rst | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index 88d8f413eb4..e18eb54aa88 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -242,3 +242,30 @@ strategies: Either approach is fine, but one may be more convenient than the other depending on the type of the duck array which you want to wrap. + +Creating datasets can be a little more involved. Using method (1) is simple: + +.. ipython:: python + :okexcept: + + def convert_ds_to_sparse(ds): + return ds.map(convert_to_sparse) + +.. ipython:: python + :okexcept: + + sparse_datasets = xrst.datasets().map(convert_ds_to_sparse) + + sparse_datasets.example() + +but building a dataset from scratch (i.e. method (2)) requires building the dataset object in such as way that all of +the data variables have compatible dimensions. You can build up a dictionary of the form ``{var_name: data_variable}`` +yourself, or you can use the ``data_vars`` argument to the ``data_variables`` strategy (TODO): + +.. ipython:: python + :okexcept: + + sparse_data_vars = xrst.data_variables(data=sparse_arrays()) + sparse_datasets = xrst.datasets(data_vars=sparse_data_vars) + + sparse_datasets.example() \ No newline at end of file From 5df60dc9ae548f157c6b2a8d60c4380c4e9fe541 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Wed, 7 Sep 2022 18:19:08 -0400 Subject: [PATCH 078/155] fix imports and cross-links --- doc/user-guide/testing.rst | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index e18eb54aa88..0caae3f6415 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -20,8 +20,8 @@ Hypothesis testing .. note:: Testing with hypothesis is a fairly advanced topic. Before reading this section it is recommended that you take a look - at our guide to xarray's data structures, are familiar with conventional unit testing in pytest, and have seen the - hypothesis library documentation. + at our guide to xarray's :ref:`data structures`, are familiar with conventional unit testing in pytest, and have seen + the hypothesis library documentation. ``Hypothesis`` is a powerful library for property-based testing. Instead of writing tests for one example at a time, it allows you to write tests parameterized by a source of many @@ -188,7 +188,7 @@ Creating Duck-type Arrays ~~~~~~~~~~~~~~~~~~~~~~~~~ Xarray objects don't have to wrap numpy arrays, in fact they can wrap any array type which presents the same API as a -numpy array (so-called "duck array wrapping", see :ref:`_internals.duck_arrays`). +numpy array (so-called "duck array wrapping", see :ref:`internals.duck_arrays`). Imagine we want to write a strategy which generates arbitrary `DataArray` objects, each of which wraps a ``sparse.COO`` array instead of a ``numpy.ndarray``. How could we do that? There are two ways: @@ -200,6 +200,7 @@ different type: :okexcept: import sparse + import hypothesis.extra.numpy as npst .. ipython:: python :okexcept: From 01078def7aed2418a73b9e94a90ed75d9979e852 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 7 Sep 2022 22:20:49 +0000 Subject: [PATCH 079/155] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- doc/user-guide/testing.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index 0caae3f6415..77a9ebafc22 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -269,4 +269,4 @@ yourself, or you can use the ``data_vars`` argument to the ``data_variables`` st sparse_data_vars = xrst.data_variables(data=sparse_arrays()) sparse_datasets = xrst.datasets(data_vars=sparse_data_vars) - sparse_datasets.example() \ No newline at end of file + sparse_datasets.example() From 53290e216d5816ba5b8212f8186f69b93b36ce52 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Thu, 8 Sep 2022 13:29:28 -0400 Subject: [PATCH 080/155] add hypothesis library to intersphinx mapping --- doc/conf.py | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/conf.py b/doc/conf.py index f0050aeb24a..de79dd69d6c 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -334,6 +334,7 @@ "cftime": ("https://unidata.github.io/cftime", None), "rasterio": ("https://rasterio.readthedocs.io/en/latest", None), "sparse": ("https://sparse.pydata.org/en/latest/", None), + "hypothesis": ("https://hypothesis.readthedocs.io/en/latest/", None), } From bd2cb6e573b0136193749a6c3b4913e8632992d5 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Thu, 8 Sep 2022 13:29:38 -0400 Subject: [PATCH 081/155] fix many links --- doc/user-guide/testing.rst | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index 77a9ebafc22..6802be434c9 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -20,13 +20,14 @@ Hypothesis testing .. note:: Testing with hypothesis is a fairly advanced topic. Before reading this section it is recommended that you take a look - at our guide to xarray's :ref:`data structures`, are familiar with conventional unit testing in pytest, and have seen - the hypothesis library documentation. + at our guide to xarray's :ref:`data structures`, are familiar with conventional unit testing in + `pytest `_, and have seen the + `hypothesis library documentation `_. -``Hypothesis`` is a powerful library for property-based testing. +`The hypothesis library `_ is a powerful tool for property-based testing. Instead of writing tests for one example at a time, it allows you to write tests parameterized by a source of many dynamically generated examples. For example you might have written a test which you wish to be parameterized by the set -of all possible ``integers()``. +of all possible integers via :py:func:`hypothesis.strategies.integers()`. Property-based testing is extremely powerful, because (unlike more conventional example-based testing) it can find bugs that you did not even think to look for! @@ -38,7 +39,7 @@ Each source of examples is called a "strategy", and xarray provides a range of c data structures containing arbitrary data. You can use these to efficiently test downstream code, quickly ensuring that your code can handle xarray objects of all possible structures and contents. -These strategies are accessible in the :py:module::`xarray.testing.strategies` module, which provides +These strategies are accessible in the :py:mod:`xarray.testing.strategies` module, which provides .. currentmodule:: xarray @@ -74,7 +75,7 @@ You can see that calling ``.example()`` multiple times will generate different e range of data that the xarray strategies can generate. In your tests however you should not use ``.example()`` - instead you should parameterize your tests with the -``hypothesis.given`` decorator: +:py:func:`hypothesis.given` decorator: .. ipython:: python @@ -113,7 +114,9 @@ For example you could create a ``chunks`` strategy to specify particular chunkin .. code-block:: - @given(st.data()) + import hypothesis.extra.numpy as npst + + @st.given(st.data()) def test_something_else_inefficiently(data): arrs = npst.arrays(dtype=numeric_dtypes) # generates arrays of any shape dims = xrst.dimension_names() # generates lists of any number of dimensions @@ -133,7 +136,7 @@ Fixing Arguments ~~~~~~~~~~~~~~~~ If you want to fix one aspect of the data structure, whilst allowing variation in the generated examples -over all other aspects, then use ``hypothesis.strategies.just()``. +over all other aspects, then use :py:func:`hypothesis.strategies.just()`. .. ipython:: python @@ -142,8 +145,8 @@ over all other aspects, then use ``hypothesis.strategies.just()``. # Generates only dataarrays with dimensions ["x", "y"] xrst.dataarrays(dims=st.just(["x", "y"])).example() -(This is technically another example of chaining strategies - ``hypothesis.strategies.just`` is simply a special -strategy that just contains a single example.) +(This is technically another example of chaining strategies - :py:func:`hypothesis.strategies.just()` is simply a +special strategy that just contains a single example.) To fix the length of dimensions you can instead pass `dims` as a mapping of dimension names to lengths (i.e. following xarray objects' ``.sizes()`` property), e.g. @@ -176,8 +179,8 @@ objects your chained strategy will generate. special_dataarrays.example() special_dataarrays.example() -Here we have used one of hypothesis' built-in strategies ``fixed_dictionaries`` to create a strategy which generates -mappings of dimension names to lengths (i.e. the ``size`` of the xarray object we want). +Here we have used one of hypothesis' built-in strategies :py:func:`hypothesis.strategies.fixed_dictionaries` to create a +strategy which generates mappings of dimension names to lengths (i.e. the ``size`` of the xarray object we want). This particular strategy will always generate an ``x`` dimension of length 2, and a ``y`` dimension of length either 3 or 4, and will sometimes also generate a ``z`` dimension of length 2. By feeding this strategy for dictionaries into the `dims` argument of xarray's `dataarrays` strategy, we can generate @@ -191,7 +194,7 @@ Xarray objects don't have to wrap numpy arrays, in fact they can wrap any array numpy array (so-called "duck array wrapping", see :ref:`internals.duck_arrays`). Imagine we want to write a strategy which generates arbitrary `DataArray` objects, each of which wraps a -``sparse.COO`` array instead of a ``numpy.ndarray``. How could we do that? There are two ways: +:py:class:`sparse.COO` array instead of a ``numpy.ndarray``. How could we do that? There are two ways: 1. Create a xarray object with numpy data and use ``.map()`` to convert the underlying array to a different type: From c5e83c268ff0fc14b8d90084baf3514d6d4818c6 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 8 Sep 2022 17:31:40 +0000 Subject: [PATCH 082/155] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- doc/user-guide/testing.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index 6802be434c9..99fec410f28 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -116,6 +116,7 @@ For example you could create a ``chunks`` strategy to specify particular chunkin import hypothesis.extra.numpy as npst + @st.given(st.data()) def test_something_else_inefficiently(data): arrs = npst.arrays(dtype=numeric_dtypes) # generates arrays of any shape From de26b2fc5f9f551ae1fe793e8d183393f944ce72 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Fri, 9 Sep 2022 10:34:07 -0400 Subject: [PATCH 083/155] fixed all local mypy errors --- xarray/testing/strategies.py | 204 ++++++++++++++++++----------------- 1 file changed, 105 insertions(+), 99 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index d9bd32e7583..189002ac035 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -1,4 +1,4 @@ -from typing import Any, Hashable, List, Mapping, Sequence, Tuple, Union +from typing import Any, Dict, Hashable, List, Mapping, Sequence, Tuple, Union import hypothesis.extra.numpy as npst import hypothesis.strategies as st @@ -39,9 +39,9 @@ def numeric_dtypes() -> st.SearchStrategy[np.dtype]: def np_arrays( *, - shape: Union[Tuple[int], st.SearchStrategy[Tuple[int]]] = npst.array_shapes( - max_side=4 - ), + shape: Union[ + Tuple[int, ...], st.SearchStrategy[Tuple[int, ...]] + ] = npst.array_shapes(max_side=4), dtype: Union[np.dtype, st.SearchStrategy[np.dtype]] = numeric_dtypes(), ) -> st.SearchStrategy[np.ndarray]: """ @@ -72,7 +72,7 @@ def dimension_names( *, min_dims: int = 0, max_dims: int = 3, -) -> st.SearchStrategy[List[str]]: +) -> st.SearchStrategy[List[Hashable]]: """ Generates an arbitrary list of valid dimension names. @@ -96,12 +96,12 @@ def dimension_names( def dimension_sizes( *, - dim_names: st.SearchStrategy[str] = names(), + dim_names: st.SearchStrategy[Hashable] = names(), min_dims: int = 0, max_dims: int = 3, min_side: int = 1, max_side: int = None, -) -> st.SearchStrategy[Mapping[str, int]]: +) -> st.SearchStrategy[Mapping[Hashable, int]]: """ Generates an arbitrary mapping from dimension names to lengths. @@ -147,7 +147,7 @@ def dimension_sizes( _attr_values = st.none() | st.booleans() | st.text(st.characters()) | _small_arrays -def attrs() -> st.SearchStrategy[Mapping[str, Any]]: +def attrs() -> st.SearchStrategy[Mapping[Hashable, Any]]: """ Generates arbitrary valid attributes dictionaries for xarray objects. @@ -172,9 +172,9 @@ def variables( draw: st.DrawFn, *, data: st.SearchStrategy[T_Array] = None, - dims: st.SearchStrategy[Union[Sequence[str], Mapping[str, int]]] = None, + dims: st.SearchStrategy[Union[Sequence[Hashable], Mapping[Hashable, int]]] = None, attrs: st.SearchStrategy[Mapping] = attrs(), -) -> st.SearchStrategy[xr.Variable]: +) -> xr.Variable: """ Generates arbitrary xarray.Variable objects. @@ -214,41 +214,42 @@ def variables( if data is not None and dims is None: # no dims -> generate dims to match data - data = draw(data) - dims = draw(dimension_names(min_dims=data.ndim, max_dims=data.ndim)) + _data = draw(data) + dim_names = draw(dimension_names(min_dims=_data.ndim, max_dims=_data.ndim)) elif dims is not None and data is None: # no data -> generate data to match dims - dims = draw(dims) - if isinstance(dims, Sequence): - valid_shapes = npst.array_shapes(min_dims=len(dims), max_dims=len(dims)) - data = draw(np_arrays(shape=draw(valid_shapes))) - elif isinstance(dims, Mapping): + _dims = draw(dims) + if isinstance(_dims, Sequence): + dim_names = list(_dims) + valid_shapes = npst.array_shapes(min_dims=len(_dims), max_dims=len(_dims)) + _data = draw(np_arrays(shape=draw(valid_shapes))) + elif isinstance(_dims, Mapping): # should be a mapping of form {dim_names: lengths} - shape = tuple(dims.values()) - data = draw(np_arrays(shape=shape)) + dim_names, shape = list(_dims.keys()), tuple(_dims.values()) + _data = draw(np_arrays(shape=shape)) else: raise ValueError(f"Invalid type for dims argument - got type {type(dims)}") elif data is not None and dims is not None: # both data and dims provided -> check drawn examples are compatible - dims = draw(dims) + _dims = draw(dims) + _data = draw(data) - if isinstance(dims, List): - data = draw(data) - if data.ndim != len(dims): + if isinstance(_dims, Sequence): + dim_names = list(_dims) + if _data.ndim != len(_dims): raise InvalidArgument( - f"Strategy attempting to generate data with {data.ndim} dims but {len(dims)} " + f"Strategy attempting to generate data with {_data.ndim} dims but {len(_dims)} " "unique dimension names. Please only pass strategies which are guaranteed to " "draw compatible examples for data and dims." ) - elif isinstance(dims, Mapping): + elif isinstance(_dims, Mapping): # should be a mapping of form {dim_names: lengths} - data = draw(data) - shape = tuple(dims.values()) - if data.shape != shape: + dim_names, shape = list(_dims.keys()), tuple(_dims.values()) + if _data.shape != shape: raise InvalidArgument( - f"Strategy attempting to generate data with shape {data.shape} dims but dimension " + f"Strategy attempting to generate data with shape {_data.shape} dims but dimension " f"sizes implying shape {shape}. Please only pass strategies which are guaranteed to " "draw compatible examples for data and dims." ) @@ -257,16 +258,16 @@ def variables( else: # nothing provided, so generate everything consistently by drawing dims to match data - data = draw(np_arrays()) - dims = draw(dimension_names(min_dims=data.ndim, max_dims=data.ndim)) + _data = draw(np_arrays()) + dim_names = draw(dimension_names(min_dims=_data.ndim, max_dims=_data.ndim)) - return xr.Variable(dims=dims, data=data, attrs=draw(attrs)) + return xr.Variable(dims=dim_names, data=_data, attrs=draw(attrs)) @st.composite def _unique_subset_of( draw: st.DrawFn, d: Mapping[Hashable, Any] -) -> st.SearchStrategy[Mapping[Hashable, Any]]: +) -> Mapping[Hashable, Any]: subset_keys = draw(st.lists(st.sampled_from(list(d.keys())), unique=True)) return {k: d[k] for k in subset_keys} @@ -276,8 +277,8 @@ def _alignable_variables( draw: st.DrawFn, *, var_names: st.SearchStrategy[str], - dim_sizes: Mapping[str, int], -) -> st.SearchStrategy[Mapping[str, xr.Variable]]: + dim_sizes: Mapping[Hashable, int], +) -> Mapping[Hashable, xr.Variable]: """ Generates dicts of names mapping to variables with compatible (i.e. alignable) dimensions and sizes. """ @@ -293,9 +294,9 @@ def _alignable_variables( def coordinate_variables( draw: st.DrawFn, *, - dim_sizes: Mapping[str, int], - coord_names: st.SearchStrategy[str] = names(), -) -> st.SearchStrategy[Mapping[str, xr.Variable]]: + dim_sizes: Mapping[Hashable, int], + coord_names: st.SearchStrategy[Hashable] = names(), +) -> Mapping[Hashable, xr.Variable]: """ Generates dicts of alignable Variable objects for use as coordinates. @@ -321,7 +322,7 @@ def coordinate_variables( dim_names = list(dim_sizes.keys()) # Possibly generate 1D "dimension coordinates" - explicit possibility not to helps with shrinking - if dim_names and draw(st.booleans()): + if len(dim_names) > 0 and draw(st.booleans()): # first generate subset of dimension names - these set which dimension coords will be included dim_coord_names_and_lengths = draw(_unique_subset_of(dim_sizes)) @@ -347,7 +348,9 @@ def coordinate_variables( return all_coords -def _sizes_from_dim_names(dims: Sequence[str]) -> st.SearchStrategy[Mapping[str, int]]: +def _sizes_from_dim_names( + dims: Sequence[Hashable], +) -> st.SearchStrategy[Dict[Hashable, int]]: size_along_dim = st.integers(min_value=1, max_value=6) return st.fixed_dictionaries({d: size_along_dim for d in dims}) @@ -357,10 +360,10 @@ def dataarrays( draw: st.DrawFn, *, data: st.SearchStrategy[T_Array] = None, - dims: st.SearchStrategy[Union[Sequence[str], Mapping[str, int]]] = None, - name: st.SearchStrategy[Union[str, None]] = names(), + dims: st.SearchStrategy[Union[Sequence[Hashable], Mapping[Hashable, int]]] = None, + name: st.SearchStrategy[Union[Hashable, None]] = names(), attrs: st.SearchStrategy[Mapping] = attrs(), -) -> st.SearchStrategy[xr.DataArray]: +) -> xr.DataArray: """ Generates arbitrary xarray.DataArray objects. @@ -391,71 +394,73 @@ def dataarrays( If custom strategies passed try to draw examples which together cannot create a valid DataArray. """ - name = draw(st.none() | name) + _name = draw(st.none() | name) # TODO add a coords argument? if data is not None and dims is None: # no dims -> generate dims to match data - data = draw(data) - dim_names = draw(dimension_names(min_dims=data.ndim, max_dims=data.ndim)) - dim_sizes = {n: l for n, l in zip(dim_names, data.shape)} + _data = draw(data) + dim_names = draw(dimension_names(min_dims=_data.ndim, max_dims=_data.ndim)) + dim_sizes: Mapping[Hashable, int] = { + n: l for n, l in zip(dim_names, _data.shape) + } coords = draw(coordinate_variables(dim_sizes=dim_sizes)) elif data is None and dims is not None: # no data -> generate data to match dims - dims = draw(dims) - if isinstance(dims, Sequence): - dim_sizes = draw(_sizes_from_dim_names(dims)) - elif isinstance(dims, Mapping): + _dims = draw(dims) + if isinstance(_dims, Sequence): + dim_sizes = draw(_sizes_from_dim_names(_dims)) + elif isinstance(_dims, Mapping): # should be a mapping of form {dim_names: lengths} - dim_sizes = dims + dim_sizes = _dims else: - raise ValueError(f"Invalid type for dims argument - got type {type(dims)}") + raise ValueError(f"Invalid type for dims argument - got type {type(_dims)}") dim_names, shape = list(dim_sizes.keys()), tuple(dim_sizes.values()) - data = draw(np_arrays(shape=shape)) + _data = draw(np_arrays(shape=shape)) coords = draw(coordinate_variables(dim_sizes=dim_sizes)) elif data is not None and dims is not None: # both data and dims provided -> check drawn examples are compatible - dims = draw(dims) - data = draw(data) - if isinstance(dims, Sequence): - dim_names = dims - if data.ndim != len(dims): + _dims = draw(dims) + _data = draw(data) + if isinstance(_dims, Sequence): + dim_names = list(_dims) + if _data.ndim != len(_dims): raise InvalidArgument( - f"Strategy attempting to generate data with {data.ndim} dims but {len(dims)} " + f"Strategy attempting to generate data with {_data.ndim} dims but {len(_dims)} " "unique dimension names. Please only pass strategies which are guaranteed to " "draw compatible examples for data and dims." ) - dim_sizes = {n: l for n, l in zip(dims, data.shape)} - elif isinstance(dims, Mapping): + dim_sizes = {n: l for n, l in zip(_dims, _data.shape)} + elif isinstance(_dims, Mapping): # should be a mapping of form {dim_names: lengths} - dim_sizes = dims + dim_sizes = _dims dim_names, shape = list(dim_sizes.keys()), tuple(dim_sizes.values()) - if data.shape != shape: + if _data.shape != shape: raise InvalidArgument( - f"Strategy attempting to generate data with shape {data.shape} dims but dimension " + f"Strategy attempting to generate data with shape {_data.shape} dims but dimension " f"sizes implying shape {shape}. Please only pass strategies which are guaranteed to " "draw compatible examples for data and dims." ) else: - raise ValueError(f"Invalid type for dims argument - got type {type(dims)}") + raise ValueError(f"Invalid type for dims argument - got type {type(_dims)}") coords = draw(coordinate_variables(dim_sizes=dim_sizes)) else: # nothing provided, so generate everything consistently by drawing dims to match data, and coords to match both - data = draw(np_arrays()) - dim_names = draw(dimension_names(min_dims=data.ndim, max_dims=data.ndim)) - dim_sizes = {n: l for n, l in zip(dim_names, data.shape)} + _data = draw(np_arrays()) + dim_names = draw(dimension_names(min_dims=_data.ndim, max_dims=_data.ndim)) + dim_sizes = {n: l for n, l in zip(dim_names, _data.shape)} coords = draw(coordinate_variables(dim_sizes=dim_sizes)) return xr.DataArray( - data=data, + data=_data, coords=coords, - name=name, + name=_name, dims=dim_names, attrs=draw(attrs), ) @@ -465,9 +470,9 @@ def dataarrays( def data_variables( draw: st.DrawFn, *, - dim_sizes: Mapping[str, int], - var_names: st.SearchStrategy[str] = names(), -) -> st.SearchStrategy[Mapping[str, xr.Variable]]: + dim_sizes: Mapping[Hashable, int], + var_names: st.SearchStrategy[Hashable] = names(), +) -> Mapping[Hashable, xr.Variable]: """ Generates dicts of alignable Variable objects for use as Dataset data variables. @@ -501,10 +506,10 @@ def data_variables( def datasets( draw: st.DrawFn, *, - data_vars: st.SearchStrategy[Mapping[str, xr.Variable]] = None, - dims: st.SearchStrategy[Union[Sequence[str], Mapping[str, int]]] = None, + data_vars: st.SearchStrategy[Mapping[Hashable, xr.Variable]] = None, + dims: st.SearchStrategy[Union[Sequence[Hashable], Mapping[Hashable, int]]] = None, attrs: st.SearchStrategy[Mapping] = attrs(), -) -> st.SearchStrategy[xr.Dataset]: +) -> xr.Dataset: """ Generates arbitrary xarray.Dataset objects. @@ -538,48 +543,48 @@ def datasets( if data_vars is not None and dims is None: # no dims -> generate dims to match data - data_vars = draw(data_vars) - dim_sizes = _find_overall_sizes(data_vars) + _data_vars = draw(data_vars) + dim_sizes = _find_overall_sizes(_data_vars) # only draw coordinate variables whose names don't conflict with data variables - allowed_coord_names = names().filter(lambda n: n not in list(data_vars.keys())) + allowed_coord_names = names().filter(lambda n: n not in list(_data_vars.keys())) coords = draw( coordinate_variables(coord_names=allowed_coord_names, dim_sizes=dim_sizes) ) elif data_vars is None and dims is not None: # no data -> generate data to match dims - dims = draw(dims) - if isinstance(dims, Sequence): - dim_sizes = draw(_sizes_from_dim_names(dims)) - elif isinstance(dims, Mapping): + _dims = draw(dims) + if isinstance(_dims, Sequence): + dim_sizes = draw(_sizes_from_dim_names(_dims)) + elif isinstance(_dims, Mapping): # should be a mapping of form {dim_names: lengths} - dim_sizes = dims + dim_sizes = _dims else: - raise ValueError(f"Invalid type for dims argument - got type {type(dims)}") + raise ValueError(f"Invalid type for dims argument - got type {type(_dims)}") coords = draw(coordinate_variables(dim_sizes=dim_sizes)) coord_names = list(coords.keys()) allowed_data_var_names = names().filter(lambda n: n not in coord_names) - data_vars = draw( + _data_vars = draw( data_variables(dim_sizes=dim_sizes, var_names=allowed_data_var_names) ) elif data_vars is not None and dims is not None: # both data and dims provided -> check drawn examples are compatible - dims = draw(dims) - if isinstance(dims, Sequence): + _dims = draw(dims) + if isinstance(_dims, Sequence): # TODO support dims as list too? raise NotImplementedError() - elif isinstance(dims, Mapping): + elif isinstance(_dims, Mapping): # should be a mapping of form {dim_names: lengths} - dim_sizes = dims - data_vars = draw(data_vars) - _check_compatible_sizes(data_vars, dim_sizes) + dim_sizes = _dims + _data_vars = draw(data_vars) + _check_compatible_sizes(_data_vars, dim_sizes) else: - raise ValueError(f"Invalid type for dims argument - got type {type(dims)}") + raise ValueError(f"Invalid type for dims argument - got type {type(_dims)}") # only draw coordinate variables whose names don't conflict with data variables - allowed_coord_names = names().filter(lambda n: n not in list(data_vars.keys())) + allowed_coord_names = names().filter(lambda n: n not in list(_data_vars.keys())) coords = draw( coordinate_variables(coord_names=allowed_coord_names, dim_sizes=dim_sizes) ) @@ -589,23 +594,24 @@ def datasets( dim_sizes = draw(dimension_sizes()) coords = draw(coordinate_variables(dim_sizes=dim_sizes)) allowed_data_var_names = names().filter(lambda n: n not in list(coords.keys())) - data_vars = draw( + _data_vars = draw( data_variables(dim_sizes=dim_sizes, var_names=allowed_data_var_names) ) - return xr.Dataset(data_vars=data_vars, coords=coords, attrs=draw(attrs)) + return xr.Dataset(data_vars=_data_vars, coords=coords, attrs=draw(attrs)) -def _find_overall_sizes(vars: Mapping[str, xr.Variable]) -> Mapping[str, int]: +def _find_overall_sizes(vars: Mapping[Hashable, xr.Variable]) -> Mapping[Hashable, int]: """Given a set of variables, find their common sizes.""" # TODO raise an error if inconsistent (i.e. if different values appear under same key) + # TODO narrow type by checking if values are not ints sizes_dicts = [v.sizes for v in vars.values()] dim_sizes = {d: s for dim_sizes in sizes_dicts for d, s in dim_sizes.items()} return dim_sizes def _check_compatible_sizes( - vars: Mapping[str, xr.Variable], dim_sizes: Mapping[str, int] + vars: Mapping[Hashable, xr.Variable], dim_sizes: Mapping[Hashable, int] ): """Check set of variables have sizes compatible with given dim_sizes. If not raise InvalidArgument error.""" From f81e14f0ec673e2c23f77b9d0a9150fecd404828 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Fri, 9 Sep 2022 16:19:29 -0400 Subject: [PATCH 084/155] move numpy strategies import --- doc/user-guide/testing.rst | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index 6802be434c9..95fc2fceb99 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -57,6 +57,12 @@ These strategies are accessible in the :py:mod:`xarray.testing.strategies` modul testing.strategies.data_variables testing.strategies.datasets +These build upon the numpy strategies offered in :py:mod:`hypothesis.extra.numpy`: + +.. ipython:: python + + import hypothesis.extra.numpy as npst + Generating Examples ~~~~~~~~~~~~~~~~~~~ @@ -114,8 +120,6 @@ For example you could create a ``chunks`` strategy to specify particular chunkin .. code-block:: - import hypothesis.extra.numpy as npst - @st.given(st.data()) def test_something_else_inefficiently(data): arrs = npst.arrays(dtype=numeric_dtypes) # generates arrays of any shape @@ -203,7 +207,6 @@ different type: :okexcept: import sparse - import hypothesis.extra.numpy as npst .. ipython:: python :okexcept: From af24af558a9460a3aac70d8f8f2a3cd02d5bd5cc Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Fri, 9 Sep 2022 23:51:56 -0400 Subject: [PATCH 085/155] reduce sizes --- xarray/testing/strategies.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 189002ac035..9d0549751c8 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -65,7 +65,7 @@ def names() -> st.SearchStrategy[str]: Requires the hypothesis package to be installed. """ - return st.text(st.characters(), min_size=1) + return st.text(st.characters(), min_size=1, max_size=5) def dimension_names( @@ -127,7 +127,7 @@ def dimension_sizes( """ if max_side is None: - max_side = min_side + 5 + max_side = min_side + 3 return st.dictionaries( keys=dim_names, @@ -141,10 +141,10 @@ def dimension_sizes( _small_arrays = np_arrays( shape=npst.array_shapes( max_side=2, - max_dims=3, + max_dims=2, ) ) -_attr_values = st.none() | st.booleans() | st.text(st.characters()) | _small_arrays +_attr_values = st.none() | st.booleans() | st.text(st.characters(), max_size=5) | _small_arrays def attrs() -> st.SearchStrategy[Mapping[Hashable, Any]]: From 9777c2afe00cd28b768610c6b9c3f9324091e080 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 10 Sep 2022 03:53:21 +0000 Subject: [PATCH 086/155] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/testing/strategies.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 9d0549751c8..ff1ef9196b6 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -144,7 +144,9 @@ def dimension_sizes( max_dims=2, ) ) -_attr_values = st.none() | st.booleans() | st.text(st.characters(), max_size=5) | _small_arrays +_attr_values = ( + st.none() | st.booleans() | st.text(st.characters(), max_size=5) | _small_arrays +) def attrs() -> st.SearchStrategy[Mapping[Hashable, Any]]: From 7841dd5c87ba493651bb8e995d2a494e536e9487 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Mon, 24 Jul 2023 10:12:54 -0400 Subject: [PATCH 087/155] fix some api links in docs --- doc/user-guide/testing.rst | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index 95fc2fceb99..2905ef375ef 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -152,7 +152,7 @@ over all other aspects, then use :py:func:`hypothesis.strategies.just()`. (This is technically another example of chaining strategies - :py:func:`hypothesis.strategies.just()` is simply a special strategy that just contains a single example.) -To fix the length of dimensions you can instead pass `dims` as a mapping of dimension names to lengths +To fix the length of dimensions you can instead pass ``dims`` as a mapping of dimension names to lengths (i.e. following xarray objects' ``.sizes()`` property), e.g. .. ipython:: python @@ -187,17 +187,17 @@ Here we have used one of hypothesis' built-in strategies :py:func:`hypothesis.st strategy which generates mappings of dimension names to lengths (i.e. the ``size`` of the xarray object we want). This particular strategy will always generate an ``x`` dimension of length 2, and a ``y`` dimension of length either 3 or 4, and will sometimes also generate a ``z`` dimension of length 2. -By feeding this strategy for dictionaries into the `dims` argument of xarray's `dataarrays` strategy, we can generate -arbitrary ``DataArray`` objects whose dimensions will always match these specifications. +By feeding this strategy for dictionaries into the ``dims`` argument of xarray's :py:func:`~st.dataarrays` strategy, +we can generate arbitrary :py:class:`~xarray.DataArray` objects whose dimensions will always match these specifications. Creating Duck-type Arrays ~~~~~~~~~~~~~~~~~~~~~~~~~ Xarray objects don't have to wrap numpy arrays, in fact they can wrap any array type which presents the same API as a -numpy array (so-called "duck array wrapping", see :ref:`internals.duck_arrays`). +numpy array (so-called "duck array wrapping", see :ref:`wrapping numpy-like arrays `). -Imagine we want to write a strategy which generates arbitrary `DataArray` objects, each of which wraps a +Imagine we want to write a strategy which generates arbitrary ``DataArray`` objects, each of which wraps a :py:class:`sparse.COO` array instead of a ``numpy.ndarray``. How could we do that? There are two ways: 1. Create a xarray object with numpy data and use ``.map()`` to convert the underlying array to a From fd6aa06d25c71beea9fada7c6f854ee2f7514439 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Thu, 2 Nov 2023 07:51:14 -0400 Subject: [PATCH 088/155] remove every strategy beyond variables --- doc/api.rst | 4 - doc/user-guide/testing.rst | 4 - xarray/testing/strategies.py | 372 +------------------------------- xarray/tests/test_strategies.py | 187 +--------------- 4 files changed, 5 insertions(+), 562 deletions(-) diff --git a/doc/api.rst b/doc/api.rst index a930c9e5466..ca783fb0409 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -1087,10 +1087,6 @@ Hypothesis Testing Strategies testing.strategies.dimension_sizes testing.strategies.attrs testing.strategies.variables - testing.strategies.coordinate_variables - testing.strategies.dataarrays - testing.strategies.data_variables - testing.strategies.datasets Exceptions ========== diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index 2905ef375ef..6cb409788e9 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -52,10 +52,6 @@ These strategies are accessible in the :py:mod:`xarray.testing.strategies` modul testing.strategies.dimension_sizes testing.strategies.attrs testing.strategies.variables - testing.strategies.coordinate_variables - testing.strategies.dataarrays - testing.strategies.data_variables - testing.strategies.datasets These build upon the numpy strategies offered in :py:mod:`hypothesis.extra.numpy`: diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index ff1ef9196b6..4a2d03cf467 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -1,4 +1,5 @@ -from typing import Any, Dict, Hashable, List, Mapping, Sequence, Tuple, Union +from collections.abc import Hashable, Mapping, Sequence +from typing import Any, Union import hypothesis.extra.numpy as npst import hypothesis.strategies as st @@ -14,10 +15,6 @@ "dimension_sizes", "attrs", "variables", - "coordinate_variables", - "dataarrays", - "data_variables", - "datasets", ] @@ -40,7 +37,7 @@ def numeric_dtypes() -> st.SearchStrategy[np.dtype]: def np_arrays( *, shape: Union[ - Tuple[int, ...], st.SearchStrategy[Tuple[int, ...]] + tuple[int, ...], st.SearchStrategy[tuple[int, ...]] ] = npst.array_shapes(max_side=4), dtype: Union[np.dtype, st.SearchStrategy[np.dtype]] = numeric_dtypes(), ) -> st.SearchStrategy[np.ndarray]: @@ -72,7 +69,7 @@ def dimension_names( *, min_dims: int = 0, max_dims: int = 3, -) -> st.SearchStrategy[List[Hashable]]: +) -> st.SearchStrategy[list[Hashable]]: """ Generates an arbitrary list of valid dimension names. @@ -264,364 +261,3 @@ def variables( dim_names = draw(dimension_names(min_dims=_data.ndim, max_dims=_data.ndim)) return xr.Variable(dims=dim_names, data=_data, attrs=draw(attrs)) - - -@st.composite -def _unique_subset_of( - draw: st.DrawFn, d: Mapping[Hashable, Any] -) -> Mapping[Hashable, Any]: - subset_keys = draw(st.lists(st.sampled_from(list(d.keys())), unique=True)) - return {k: d[k] for k in subset_keys} - - -@st.composite -def _alignable_variables( - draw: st.DrawFn, - *, - var_names: st.SearchStrategy[str], - dim_sizes: Mapping[Hashable, int], -) -> Mapping[Hashable, xr.Variable]: - """ - Generates dicts of names mapping to variables with compatible (i.e. alignable) dimensions and sizes. - """ - - alignable_dim_sizes = draw(_unique_subset_of(dim_sizes)) if dim_sizes else {} - - vars = variables(dims=st.just(alignable_dim_sizes)) - # TODO don't hard code max number of variables - return draw(st.dictionaries(var_names, vars, max_size=3)) - - -@st.composite -def coordinate_variables( - draw: st.DrawFn, - *, - dim_sizes: Mapping[Hashable, int], - coord_names: st.SearchStrategy[Hashable] = names(), -) -> Mapping[Hashable, xr.Variable]: - """ - Generates dicts of alignable Variable objects for use as coordinates. - - Differs from data_variables strategy in that it deliberately creates dimension coordinates - (i.e. 1D variables with the same name as a dimension) as well as non-dimension coordinates. - - Requires the hypothesis package to be installed. - - Parameters - ---------- - dim_sizes: Mapping of str to int - Sizes of dimensions to use for coordinates. - coord_names: Strategy generating strings, optional - Allowed names for non-dimension coordinates. Defaults to `names` strategy. - """ - - all_coords = {} - - if draw( - st.booleans() - ): # Allow for no coordinate variables - explicit possibility not to helps with shrinking - - dim_names = list(dim_sizes.keys()) - - # Possibly generate 1D "dimension coordinates" - explicit possibility not to helps with shrinking - if len(dim_names) > 0 and draw(st.booleans()): - # first generate subset of dimension names - these set which dimension coords will be included - dim_coord_names_and_lengths = draw(_unique_subset_of(dim_sizes)) - - # then generate 1D variables for each name - dim_coords = { - n: draw(variables(dims=st.just({n: l}))) - for n, l in dim_coord_names_and_lengths.items() - } - all_coords.update(dim_coords) - - # Possibly generate ND "non-dimension coordinates" - explicit possibility not to helps with shrinking - if draw(st.booleans()): - - # can't have same name as a dimension - valid_non_dim_coord_names = coord_names.filter(lambda n: n not in dim_names) - non_dim_coords = draw( - _alignable_variables( - var_names=valid_non_dim_coord_names, dim_sizes=dim_sizes - ) - ) - all_coords.update(non_dim_coords) - - return all_coords - - -def _sizes_from_dim_names( - dims: Sequence[Hashable], -) -> st.SearchStrategy[Dict[Hashable, int]]: - size_along_dim = st.integers(min_value=1, max_value=6) - return st.fixed_dictionaries({d: size_along_dim for d in dims}) - - -@st.composite -def dataarrays( - draw: st.DrawFn, - *, - data: st.SearchStrategy[T_Array] = None, - dims: st.SearchStrategy[Union[Sequence[Hashable], Mapping[Hashable, int]]] = None, - name: st.SearchStrategy[Union[Hashable, None]] = names(), - attrs: st.SearchStrategy[Mapping] = attrs(), -) -> xr.DataArray: - """ - Generates arbitrary xarray.DataArray objects. - - Follows the basic signature of the xarray.DataArray constructor, but you can also pass alternative strategies to - generate either numpy-like array data, dimensions, or coordinates. - - Passing nothing will generate a completely arbitrary DataArray (backed by a numpy array). - - Requires the hypothesis package to be installed. - - Parameters - ---------- - data: Strategy generating array-likes, optional - Default is to generate numpy data of arbitrary shape, values and dtypes. - dims: Strategy for generating the dimensions, optional - Can either be a strategy for generating a sequence of string dimension names, - or a strategy for generating a mapping of string dimension names to integer lengths along each dimension. - If provided in the former form the lengths of the returned Variable will either be determined from the - data argument if given or arbitrarily generated if not. - Default is to generate arbitrary dimension sizes, or arbitrary dimension names for each axis in data. - name: Strategy for generating a string name, optional - Default is to use the `names` strategy, or to create an unnamed DataArray. - attrs: Strategy which generates dicts, optional - - Raises - ------ - hypothesis.errors.InvalidArgument - If custom strategies passed try to draw examples which together cannot create a valid DataArray. - """ - - _name = draw(st.none() | name) - - # TODO add a coords argument? - - if data is not None and dims is None: - # no dims -> generate dims to match data - _data = draw(data) - dim_names = draw(dimension_names(min_dims=_data.ndim, max_dims=_data.ndim)) - dim_sizes: Mapping[Hashable, int] = { - n: l for n, l in zip(dim_names, _data.shape) - } - coords = draw(coordinate_variables(dim_sizes=dim_sizes)) - - elif data is None and dims is not None: - # no data -> generate data to match dims - _dims = draw(dims) - if isinstance(_dims, Sequence): - dim_sizes = draw(_sizes_from_dim_names(_dims)) - elif isinstance(_dims, Mapping): - # should be a mapping of form {dim_names: lengths} - dim_sizes = _dims - else: - raise ValueError(f"Invalid type for dims argument - got type {type(_dims)}") - - dim_names, shape = list(dim_sizes.keys()), tuple(dim_sizes.values()) - _data = draw(np_arrays(shape=shape)) - coords = draw(coordinate_variables(dim_sizes=dim_sizes)) - - elif data is not None and dims is not None: - # both data and dims provided -> check drawn examples are compatible - _dims = draw(dims) - _data = draw(data) - if isinstance(_dims, Sequence): - dim_names = list(_dims) - if _data.ndim != len(_dims): - raise InvalidArgument( - f"Strategy attempting to generate data with {_data.ndim} dims but {len(_dims)} " - "unique dimension names. Please only pass strategies which are guaranteed to " - "draw compatible examples for data and dims." - ) - dim_sizes = {n: l for n, l in zip(_dims, _data.shape)} - elif isinstance(_dims, Mapping): - # should be a mapping of form {dim_names: lengths} - dim_sizes = _dims - dim_names, shape = list(dim_sizes.keys()), tuple(dim_sizes.values()) - if _data.shape != shape: - raise InvalidArgument( - f"Strategy attempting to generate data with shape {_data.shape} dims but dimension " - f"sizes implying shape {shape}. Please only pass strategies which are guaranteed to " - "draw compatible examples for data and dims." - ) - else: - raise ValueError(f"Invalid type for dims argument - got type {type(_dims)}") - - coords = draw(coordinate_variables(dim_sizes=dim_sizes)) - - else: - # nothing provided, so generate everything consistently by drawing dims to match data, and coords to match both - _data = draw(np_arrays()) - dim_names = draw(dimension_names(min_dims=_data.ndim, max_dims=_data.ndim)) - dim_sizes = {n: l for n, l in zip(dim_names, _data.shape)} - coords = draw(coordinate_variables(dim_sizes=dim_sizes)) - - return xr.DataArray( - data=_data, - coords=coords, - name=_name, - dims=dim_names, - attrs=draw(attrs), - ) - - -@st.composite -def data_variables( - draw: st.DrawFn, - *, - dim_sizes: Mapping[Hashable, int], - var_names: st.SearchStrategy[Hashable] = names(), -) -> Mapping[Hashable, xr.Variable]: - """ - Generates dicts of alignable Variable objects for use as Dataset data variables. - - Requires the hypothesis package to be installed. - - Parameters - ---------- - dim_sizes: Mapping of str to int - Sizes of dimensions to use for variables. - var_names: Strategy generating strings - Allowed names for data variables. Needed to avoid conflict with names of coordinate variables & dimensions. - """ - if draw( - st.booleans() - ): # Allow for no coordinate variables - explicit possibility not to helps with shrinking - dim_names = list(dim_sizes.keys()) - - # can't have same name as a dimension - # TODO this is also used in coordinate_variables so refactor it out into separate function - valid_var_names = var_names.filter(lambda n: n not in dim_names) - data_vars = draw( - _alignable_variables(var_names=valid_var_names, dim_sizes=dim_sizes) - ) - else: - data_vars = {} - - return data_vars - - -@st.composite -def datasets( - draw: st.DrawFn, - *, - data_vars: st.SearchStrategy[Mapping[Hashable, xr.Variable]] = None, - dims: st.SearchStrategy[Union[Sequence[Hashable], Mapping[Hashable, int]]] = None, - attrs: st.SearchStrategy[Mapping] = attrs(), -) -> xr.Dataset: - """ - Generates arbitrary xarray.Dataset objects. - - Follows the basic signature of the xarray.Dataset constructor, but you can also pass alternative strategies to - generate either numpy-like array data variables or dimensions. - - Passing nothing will generate a completely arbitrary Dataset (backed by numpy arrays). - - Requires the hypothesis package to be installed. - - Parameters - ---------- - data_vars: Strategy generating mappings from variable names to xr.Variable objects, optional - Default is to generate an arbitrary combination of compatible variables with sizes matching dims, - but arbitrary names, dtypes, and values. - dims: Strategy for generating the dimensions, optional - Can either be a strategy for generating a sequence of string dimension names, - or a strategy for generating a mapping of string dimension names to integer lengths along each dimension. - If provided in the former form the lengths of the returned Variable will either be determined from the - data argument if given or arbitrarily generated if not. - Default is to generate arbitrary dimension sizes. - attrs: Strategy which generates dicts, optional - - Raises - ------ - hypothesis.errors.InvalidArgument - If custom strategies passed try to draw examples which together cannot create a valid DataArray. - """ - - # TODO add a coords argument? - - if data_vars is not None and dims is None: - # no dims -> generate dims to match data - _data_vars = draw(data_vars) - dim_sizes = _find_overall_sizes(_data_vars) - # only draw coordinate variables whose names don't conflict with data variables - allowed_coord_names = names().filter(lambda n: n not in list(_data_vars.keys())) - coords = draw( - coordinate_variables(coord_names=allowed_coord_names, dim_sizes=dim_sizes) - ) - - elif data_vars is None and dims is not None: - # no data -> generate data to match dims - _dims = draw(dims) - if isinstance(_dims, Sequence): - dim_sizes = draw(_sizes_from_dim_names(_dims)) - elif isinstance(_dims, Mapping): - # should be a mapping of form {dim_names: lengths} - dim_sizes = _dims - else: - raise ValueError(f"Invalid type for dims argument - got type {type(_dims)}") - - coords = draw(coordinate_variables(dim_sizes=dim_sizes)) - coord_names = list(coords.keys()) - allowed_data_var_names = names().filter(lambda n: n not in coord_names) - _data_vars = draw( - data_variables(dim_sizes=dim_sizes, var_names=allowed_data_var_names) - ) - - elif data_vars is not None and dims is not None: - # both data and dims provided -> check drawn examples are compatible - _dims = draw(dims) - if isinstance(_dims, Sequence): - # TODO support dims as list too? - raise NotImplementedError() - elif isinstance(_dims, Mapping): - # should be a mapping of form {dim_names: lengths} - dim_sizes = _dims - _data_vars = draw(data_vars) - _check_compatible_sizes(_data_vars, dim_sizes) - else: - raise ValueError(f"Invalid type for dims argument - got type {type(_dims)}") - - # only draw coordinate variables whose names don't conflict with data variables - allowed_coord_names = names().filter(lambda n: n not in list(_data_vars.keys())) - coords = draw( - coordinate_variables(coord_names=allowed_coord_names, dim_sizes=dim_sizes) - ) - - else: - # nothing provided, so generate everything consistently by drawing data to match dims, and coords to match both - dim_sizes = draw(dimension_sizes()) - coords = draw(coordinate_variables(dim_sizes=dim_sizes)) - allowed_data_var_names = names().filter(lambda n: n not in list(coords.keys())) - _data_vars = draw( - data_variables(dim_sizes=dim_sizes, var_names=allowed_data_var_names) - ) - - return xr.Dataset(data_vars=_data_vars, coords=coords, attrs=draw(attrs)) - - -def _find_overall_sizes(vars: Mapping[Hashable, xr.Variable]) -> Mapping[Hashable, int]: - """Given a set of variables, find their common sizes.""" - # TODO raise an error if inconsistent (i.e. if different values appear under same key) - # TODO narrow type by checking if values are not ints - sizes_dicts = [v.sizes for v in vars.values()] - dim_sizes = {d: s for dim_sizes in sizes_dicts for d, s in dim_sizes.items()} - return dim_sizes - - -def _check_compatible_sizes( - vars: Mapping[Hashable, xr.Variable], dim_sizes: Mapping[Hashable, int] -): - """Check set of variables have sizes compatible with given dim_sizes. If not raise InvalidArgument error.""" - - for name, v in vars.items(): - if not set(v.sizes.items()).issubset(set(dim_sizes.items())): - raise InvalidArgument( - f"Strategy attempting to generate object with dimension sizes {dim_sizes} but drawn " - f"variable {name} has sizes {v.sizes}, which is incompatible." - "Please only pass strategies which are guaranteed to draw compatible examples for data " - "and dims." - ) diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index 4c628be168d..8441d83e206 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -1,5 +1,3 @@ -import contextlib - import numpy as np import numpy.testing as npt import pytest @@ -9,17 +7,12 @@ import hypothesis.extra.numpy as npst import hypothesis.strategies as st -from hypothesis import Phase, given, settings +from hypothesis import given from hypothesis.errors import InvalidArgument -from xarray import DataArray, Dataset from xarray.core.variable import Variable from xarray.testing.strategies import ( attrs, - coordinate_variables, - data_variables, - dataarrays, - datasets, dimension_names, dimension_sizes, np_arrays, @@ -167,181 +160,3 @@ def test_given_fixed_sizes_and_arbitrary_data(self, data): var = data.draw(variables(data=arrs, dims=st.just({"x": 2, "y": 3}))) assert var.shape == (2, 3) - - -class TestCoordinateVariablesStrategy: - @given(coordinate_variables(dim_sizes={"x": 2, "y": 3})) - def test_alignable(self, coord_vars): - - # TODO there must be a better way of checking align-ability than this - for v in coord_vars.values(): - if "x" in v.dims: - assert v.sizes["x"] == 2 - if "y" in v.dims: - assert v.sizes["y"] == 3 - if not set(v.dims).issubset({"x", "y"}): - assert False, v - - @given(st.data()) - def test_valid_set_of_coords(self, data): - coord_vars = data.draw(coordinate_variables(dim_sizes={"x": 2, "y": 3})) - - arr = data.draw(np_arrays(shape=(2, 3))) - da = DataArray(data=arr, coords=coord_vars, dims=["x", "y"]) - assert isinstance(da, DataArray) - - def test_sometimes_generates_1d_dim_coords(self): - found_one = False - - @given(st.data()) - @settings(phases=[Phase.generate]) - def inner(data): - coord_vars = data.draw(coordinate_variables(dim_sizes={"x": 2, "y": 3})) - for name, var in coord_vars.items(): - if var.ndim == 1 and name == var.dims[0]: - nonlocal found_one - found_one = True - raise AssertionError # early stopping - test is correct but slower without this - - with contextlib.suppress(AssertionError): - inner() - - assert found_one - - def test_sometimes_generates_non_dim_coords(self): - found_one = False - - @given(st.data()) - @settings(phases=[Phase.generate]) - def inner(data): - coord_vars = data.draw(coordinate_variables(dim_sizes={"x": 2, "y": 3})) - for name, var in coord_vars.items(): - if var.ndim != 1 or (var.ndim == 1 and name != var.dims[0]): - nonlocal found_one - found_one = True - raise AssertionError # early stopping - test is correct but slower without this - - with contextlib.suppress(AssertionError): - inner() - - assert found_one - - @given(st.data()) - def test_restrict_names(self, data): - capitalized_names = st.text(st.characters(), min_size=1).map(str.upper) - coord_vars = data.draw( - coordinate_variables( - dim_sizes={"x": 2, "y": 3}, coord_names=capitalized_names - ) - ) - for name in coord_vars.keys(): - if name not in ["x", "y"]: - assert name.upper() == name - - -class TestDataArraysStrategy: - @given(dataarrays()) - def test_given_nothing(self, da): - assert isinstance(da, DataArray) - - @given(st.data()) - def test_given_dims(self, data): - da = data.draw(dataarrays(dims=st.just(["x", "y"]))) - assert da.dims == ("x", "y") - - da = data.draw(dataarrays(dims=st.just({"x": 2, "y": 3}))) - assert da.sizes == {"x": 2, "y": 3} - - @given(st.data()) - def test_given_data(self, data): - shape = (2, 3) - arrs = np_arrays(shape=shape) - da = data.draw(dataarrays(data=arrs)) - - assert da.shape == shape - - @given(st.data()) - def test_given_data_and_dims(self, data): - arrs = np_arrays(shape=(2, 3)) - dims = dimension_names(min_dims=2, max_dims=2) - da = data.draw(dataarrays(data=arrs, dims=dims)) - assert da.shape == (2, 3) - - dims = dimension_names(min_dims=3, max_dims=3) - with pytest.raises(InvalidArgument): - data.draw(dataarrays(data=arrs, dims=dims)) - - arrs = np_arrays(shape=(3, 4)) - dims = st.just({"x": 3, "y": 4}) - da = data.draw(dataarrays(data=arrs, dims=dims)) - assert da.sizes == {"x": 3, "y": 4} - - -class TestDataVariablesStrategy: - @given(st.data()) - def test_given_only_sizes(self, data): - dim_sizes = {"x": 2, "y": 3} - data_vars = data.draw(data_variables(dim_sizes=dim_sizes)) - for k, v in data_vars.items(): - assert isinstance(v, Variable) - assert set(v.sizes.items()).issubset(set(dim_sizes.items())) - - @given(st.data()) - def test_restrict_names(self, data): - capitalized_names = st.text(st.characters(), min_size=1).map(str.upper) - data_vars = data.draw( - data_variables(dim_sizes={"x": 2, "y": 3}, var_names=capitalized_names) - ) - for name in data_vars.keys(): - assert name.upper() == name - - -class TestDatasetsStrategy: - @given(datasets()) - def test_given_nothing(self, ds): - assert isinstance(ds, Dataset) - - @given(st.data()) - def test_given_data(self, data): - dim_sizes = {"x": 3, "y": 4} - data_vars = data.draw(data_variables(dim_sizes=dim_sizes)) - ds = data.draw(datasets(data_vars=st.just(data_vars))) - assert set(ds.sizes.items()).issubset(set(dim_sizes.items())) - - @given(st.data()) - def test_given_dims(self, data): - dims = ["x", "y"] - ds = data.draw(datasets(dims=st.just(dims))) - assert set(ds.dims).issubset(set(dims)) - - dim_sizes = {"x": 3, "y": 4} - ds = data.draw(datasets(dims=st.just(dim_sizes))) - assert set(ds.sizes.items()).issubset(set(dim_sizes.items())) - - @given(st.data()) - def test_given_data_and_dims(self, data): - - # pass dims as mapping - dim_sizes = {"x": 3, "y": 4} - data_vars = data.draw(data_variables(dim_sizes=dim_sizes)) - ds = data.draw(datasets(data_vars=st.just(data_vars), dims=st.just(dim_sizes))) - assert set(ds.sizes.items()).issubset(set(dim_sizes.items())) - - incompatible_dim_sizes = {"x": 1, "y": 4} - data_vars = {"foo": Variable(data=[0, 1, 2], dims="x")} - with pytest.raises(InvalidArgument, match="drawn variable"): - data.draw( - datasets( - data_vars=st.just(data_vars), dims=st.just(incompatible_dim_sizes) - ) - ) - - @pytest.mark.xfail(reason="not implemented") - @given(st.data()) - def test_given_data_and_dims_as_sequence(self, data): - # pass dims as sequence - dim_sizes = {"x": 3, "y": 4} - dims = list(dim_sizes.keys()) - data_vars = data.draw(data_variables(dim_sizes=dim_sizes)) - ds = data.draw(datasets(data_vars=st.just(data_vars), dims=st.just(dims))) - assert set(ds.sizes.items()).issubset(set(dim_sizes.items())) From df3341e92bbbc454c207cf4aa5515485238873d7 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Thu, 2 Nov 2023 08:59:31 -0400 Subject: [PATCH 089/155] variable strategy now accepts callable generating array strategies --- xarray/testing/__init__.py | 2 +- xarray/testing/strategies.py | 86 ++++++++++++++++-------------------- 2 files changed, 39 insertions(+), 49 deletions(-) diff --git a/xarray/testing/__init__.py b/xarray/testing/__init__.py index c9d1f9b1790..7f35a2b2be2 100644 --- a/xarray/testing/__init__.py +++ b/xarray/testing/__init__.py @@ -1,4 +1,4 @@ -from .testing import ( # noqa: F401 +from xarray.testing.testing import ( # noqa: F401 _assert_dataarray_invariants, _assert_dataset_invariants, _assert_indexes_invariants_checks, diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 4a2d03cf467..70d5f9626e8 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -1,12 +1,12 @@ from collections.abc import Hashable, Mapping, Sequence -from typing import Any, Union +from typing import Any, Protocol, Union import hypothesis.extra.numpy as npst import hypothesis.strategies as st import numpy as np -from hypothesis.errors import InvalidArgument import xarray as xr +from xarray.core.types import T_DuckArray __all__ = [ "numeric_dtypes", @@ -18,6 +18,13 @@ ] +class ArrayStrategyFn(Protocol): + def __call__( + self, *, shape: tuple[int, ...] = None, dtype: np.dtype = None, **kwargs + ) -> st.SearchStrategy[T_DuckArray]: + ... + + # required to exclude weirder dtypes e.g. unicode, byte_string, array, or nested dtypes. def numeric_dtypes() -> st.SearchStrategy[np.dtype]: """ @@ -161,32 +168,30 @@ def attrs() -> st.SearchStrategy[Mapping[Hashable, Any]]: ) -# Is there a way to do this in general? -# Could make a Protocol... -T_Array = Any - - @st.composite def variables( draw: st.DrawFn, *, - data: st.SearchStrategy[T_Array] = None, + array_strategy_fn: ArrayStrategyFn = None, dims: st.SearchStrategy[Union[Sequence[Hashable], Mapping[Hashable, int]]] = None, + dtype: st.SearchStrategy[np.dtype] = numeric_dtypes(), attrs: st.SearchStrategy[Mapping] = attrs(), ) -> xr.Variable: """ Generates arbitrary xarray.Variable objects. - Follows the signature of the xarray.Variable constructor, but you can also pass alternative strategies to generate - either numpy-like array data or dimension names. + Follows the basic signature of the xarray.Variable constructor, but allows passing alternative strategies to + generate either numpy-like array data or dimensions. - Passing nothing will generate a completely arbitrary Variable (backed by a numpy array). + Passing nothing will generate a completely arbitrary Variable (containing a numpy array). Requires the hypothesis package to be installed. Parameters ---------- - data: Strategy generating array-likes, optional + array_strategy_fn: Callable which returns a strategy generating array-likes, optional + Callable must accept shape and dtype kwargs if passed. + If array_strategy_fn is not passed then the shapes will be derived from the dims kwarg. Default is to generate numpy data of arbitrary shape, values and dtype. dims: Strategy for generating the dimensions, optional Can either be a strategy for generating a sequence of string dimension names, @@ -194,8 +199,14 @@ def variables( If provided in the former form the lengths of the returned Variable will either be determined from the data argument if given or arbitrarily generated if not. Default is to generate arbitrary dimension names for each axis in data. + dtype: Strategy which generates np.dtype objects, optional attrs: Strategy which generates dicts, optional + Returns + ------- + variable_strategy + Strategy for generating xarray.Variable objects. + Raises ------ hypothesis.errors.InvalidArgument @@ -204,60 +215,39 @@ def variables( if any( not isinstance(arg, st.SearchStrategy) and arg is not None - for arg in [data, dims, attrs] + for arg in [dims, dtype, attrs] ): raise TypeError( "Contents must be provided as a hypothesis.strategies.SearchStrategy object (or None)." "To specify fixed contents, use hypothesis.strategies.just()." ) - if data is not None and dims is None: - # no dims -> generate dims to match data - _data = draw(data) - dim_names = draw(dimension_names(min_dims=_data.ndim, max_dims=_data.ndim)) + if not array_strategy_fn: + array_strategy_fn = np_arrays - elif dims is not None and data is None: - # no data -> generate data to match dims + if dims is not None: + # generate dims first then draw data to match _dims = draw(dims) if isinstance(_dims, Sequence): dim_names = list(_dims) valid_shapes = npst.array_shapes(min_dims=len(_dims), max_dims=len(_dims)) - _data = draw(np_arrays(shape=draw(valid_shapes))) - elif isinstance(_dims, Mapping): - # should be a mapping of form {dim_names: lengths} - dim_names, shape = list(_dims.keys()), tuple(_dims.values()) - _data = draw(np_arrays(shape=shape)) - else: - raise ValueError(f"Invalid type for dims argument - got type {type(dims)}") - - elif data is not None and dims is not None: - # both data and dims provided -> check drawn examples are compatible - _dims = draw(dims) - _data = draw(data) - - if isinstance(_dims, Sequence): - dim_names = list(_dims) - if _data.ndim != len(_dims): - raise InvalidArgument( - f"Strategy attempting to generate data with {_data.ndim} dims but {len(_dims)} " - "unique dimension names. Please only pass strategies which are guaranteed to " - "draw compatible examples for data and dims." - ) + array_strategy = array_strategy_fn( + shape=draw(valid_shapes), dtype=draw(dtype) + ) elif isinstance(_dims, Mapping): # should be a mapping of form {dim_names: lengths} dim_names, shape = list(_dims.keys()), tuple(_dims.values()) - if _data.shape != shape: - raise InvalidArgument( - f"Strategy attempting to generate data with shape {_data.shape} dims but dimension " - f"sizes implying shape {shape}. Please only pass strategies which are guaranteed to " - "draw compatible examples for data and dims." - ) + array_strategy = array_strategy_fn(shape=shape, dtype=draw(dtype)) else: - raise ValueError(f"Invalid type for dims argument - got type {type(dims)}") + raise ValueError( + f"Invalid type returned by dims strategy - drew an object of type {type(dims)}" + ) + _data = draw(array_strategy) else: # nothing provided, so generate everything consistently by drawing dims to match data - _data = draw(np_arrays()) + array_strategy = array_strategy_fn(dtype=draw(dtype)) + _data = draw(array_strategy) dim_names = draw(dimension_names(min_dims=_data.ndim, max_dims=_data.ndim)) return xr.Variable(dims=dim_names, data=_data, attrs=draw(attrs)) From de4de8fd501e062dde7bd6311e94d6137ef524d2 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Thu, 2 Nov 2023 09:16:21 -0400 Subject: [PATCH 090/155] use only readable unicode characters in names --- xarray/testing/strategies.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 70d5f9626e8..5f9c541c754 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -69,7 +69,14 @@ def names() -> st.SearchStrategy[str]: Requires the hypothesis package to be installed. """ - return st.text(st.characters(), min_size=1, max_size=5) + # TODO Generalize to all valid unicode characters after formatting bugs in xarray's reprs are fixed. + return st.text( + st.characters( + categories=["L", "N"], max_codepoint=0x017F + ), # only use characters within the "Latin Extended-A" subset of unicode + min_size=1, + max_size=5, + ) def dimension_names( @@ -211,6 +218,10 @@ def variables( ------ hypothesis.errors.InvalidArgument If custom strategies passed try to draw examples which together cannot create a valid Variable. + + Examples + -------- + """ if any( From af14dc2c571cabbfa1aa853aae643e135dff4d1b Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Thu, 2 Nov 2023 09:40:37 -0400 Subject: [PATCH 091/155] examples --- xarray/testing/strategies.py | 56 +++++++++++++++++++++++++++++++++--- 1 file changed, 52 insertions(+), 4 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 5f9c541c754..48a56c1bbdb 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -216,12 +216,60 @@ def variables( Raises ------ - hypothesis.errors.InvalidArgument - If custom strategies passed try to draw examples which together cannot create a valid Variable. + TypeError + If custom strategies passed try to draw examples which are not of the correct type. Examples -------- - + Generate completely arbitrary Variable objects backed by a numpy array: + + >>> variables().example() + + array([43506, -16, -151], dtype=int32) + >>> variables().example() + + + array([[[-10000000., -10000000.], + [-10000000., -10000000.]], + + [[-10000000., -10000000.], + [ 0., -10000000.]], + + [[ 0., -10000000.], + [-10000000., inf]], + + [[ -0., -10000000.], + [-10000000., -0.]]], dtype=float32) + + Generate only Variable objects with certain dimension names: + + >>> variables(dims=st.just(["a", "b"])).example() + + array([[ 248, 4294967295, 4294967295], + [2412855555, 3514117556, 4294967295], + [ 111, 4294967295, 4294967295], + [4294967295, 1084434988, 51688], + [ 47714, 252, 11207]], dtype=uint32) + + Generate only Variable objects with certain dimension names and lengths: + + >>> variables(dims=st.just({"a": 2, "b": 1})).example() + + array([[-1.00000000e+007+3.40282347e+038j], + [-2.75034266e-225+2.22507386e-311j]]) + + Generate completely arbitrary Variable objects backed by a sparse array: + + >>> from hypothesis.extra.array_api import make_strategies_namespace + >>> import cupy as cp + >>> cupy_strategy_fn = make_strategies_namespace(cp).arrays + >>> cupy_da = variables(array_strategy_fn=cupy_strategy_fn).example() + >>> cupy_da + + array([[ 0., 1., 2.], + [ 3., 4., 5.]], dtype=float32) + >>> cupy_da.data.device + """ if any( @@ -250,7 +298,7 @@ def variables( dim_names, shape = list(_dims.keys()), tuple(_dims.values()) array_strategy = array_strategy_fn(shape=shape, dtype=draw(dtype)) else: - raise ValueError( + raise TypeError( f"Invalid type returned by dims strategy - drew an object of type {type(dims)}" ) _data = draw(array_strategy) From d001dbb2c0b22c3884aed30a18d0639a5b3f2c5b Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Thu, 2 Nov 2023 09:49:23 -0400 Subject: [PATCH 092/155] only use unicode characters that docs can deal with --- xarray/testing/strategies.py | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 48a56c1bbdb..25eb0c62983 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -63,17 +63,20 @@ def np_arrays( return npst.arrays(dtype=dtype, shape=shape) +# TODO Generalize to all valid unicode characters once formatting bugs in xarray's reprs are fixed + docs can handle it. +_readable_characters = st.characters( + categories=["L", "N"], max_codepoint=0x017F +) # only use characters within the "Latin Extended-A" subset of unicode + + def names() -> st.SearchStrategy[str]: """ Generates arbitrary string names for dimensions / variables. Requires the hypothesis package to be installed. """ - # TODO Generalize to all valid unicode characters after formatting bugs in xarray's reprs are fixed. return st.text( - st.characters( - categories=["L", "N"], max_codepoint=0x017F - ), # only use characters within the "Latin Extended-A" subset of unicode + _readable_characters, min_size=1, max_size=5, ) @@ -148,16 +151,18 @@ def dimension_sizes( ) -_attr_keys = st.text(st.characters()) +_readable_strings = st.text( + _readable_characters, + max_size=5, +) +_attr_keys = _readable_strings _small_arrays = np_arrays( shape=npst.array_shapes( max_side=2, max_dims=2, ) ) -_attr_values = ( - st.none() | st.booleans() | st.text(st.characters(), max_size=5) | _small_arrays -) +_attr_values = st.none() | st.booleans() | _readable_strings | _small_arrays def attrs() -> st.SearchStrategy[Mapping[Hashable, Any]]: @@ -227,7 +232,6 @@ def variables( array([43506, -16, -151], dtype=int32) >>> variables().example() - array([[[-10000000., -10000000.], [-10000000., -10000000.]], @@ -240,6 +244,8 @@ def variables( [[ -0., -10000000.], [-10000000., -0.]]], dtype=float32) + Attributes: + śřĴ: {'ĉ': {'iĥſ': array([-30117, -1740], dtype=int16)}} Generate only Variable objects with certain dimension names: From d4c9cb5ba021e05410c6a4794c543b25363f1782 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Thu, 2 Nov 2023 10:18:53 -0400 Subject: [PATCH 093/155] docs: dataarrays -> variables --- doc/user-guide/testing.rst | 71 ++++++++++++-------------------------- 1 file changed, 22 insertions(+), 49 deletions(-) diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index 6cb409788e9..b78e913778c 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -69,9 +69,9 @@ which is a general hypothesis method valid for all strategies. import xarray.testing.strategies as xrst - xrst.dataarrays().example() - xrst.dataarrays().example() - xrst.dataarrays().example() + xrst.variables().example() + xrst.variables().example() + xrst.variables().example() You can see that calling ``.example()`` multiple times will generate different examples, giving you an idea of the wide range of data that the xarray strategies can generate. @@ -85,9 +85,9 @@ In your tests however you should not use ``.example()`` - instead you should par .. ipython:: python - @given(xrst.dataarrays()) - def test_function_that_acts_on_dataarrays(da): - assert func(da) == ... + @given(xrst.variables()) + def test_function_that_acts_on_variables(var): + assert func(var) == ... Chaining Strategies @@ -98,39 +98,13 @@ examples. .. ipython:: python - # generate a DataArray with shape (3, 4), but all other details still arbitrary - xrst.dataarrays( - data=xrst.np_arrays(shape=(3, 4), dtype=np.dtype("int32")) - ).example() + # generate a Variable containing an array with a complex number dtype, but all other details still arbitrary + from hypothesis.extra.numpy import complex_number_dtypes -This also works with custom strategies, or strategies defined in other packages. -For example you could create a ``chunks`` strategy to specify particular chunking patterns for a dask-backed array. - -.. warning:: - When passing multiple different strategies to the same constructor the drawn examples must be mutually compatible. - - In order to construct a valid xarray object to return, our strategies must check that the - variables / dimensions / coordinates are mutually compatible. If you pass multiple custom strategies to a strategy - constructor which are not compatible in all cases, an error will be raised, *even if they are still compatible in - other cases*. For example - - .. code-block:: - - @st.given(st.data()) - def test_something_else_inefficiently(data): - arrs = npst.arrays(dtype=numeric_dtypes) # generates arrays of any shape - dims = xrst.dimension_names() # generates lists of any number of dimensions - - # Drawing examples from this strategy will raise a hypothesis.errors.InvalidArgument error. - var = data.draw(xrst.variables(data=arrs, dims=dims)) - - assert ... - - Here we have passed custom strategies which won't often be compatible: only rarely will the array's ``ndims`` - correspond to the number of dimensions drawn. We forbid arguments that are only *sometimes* compatible in order to - avoid extremely poor example generation performance (as generating invalid examples and rejecting them is - potentially unboundedly inefficient). + xrst.variables(dtype=complex_number_dtypes()).example() +This also works with custom strategies, or strategies defined in other packages. +For example you could imagine creating a ``chunks`` strategy to specify particular chunking patterns for a dask-backed array. Fixing Arguments ~~~~~~~~~~~~~~~~ @@ -142,8 +116,8 @@ over all other aspects, then use :py:func:`hypothesis.strategies.just()`. import hypothesis.strategies as st - # Generates only dataarrays with dimensions ["x", "y"] - xrst.dataarrays(dims=st.just(["x", "y"])).example() + # Generates only variable objects with dimensions ["x", "y"] + xrst.variables(dims=st.just(["x", "y"])).example() (This is technically another example of chaining strategies - :py:func:`hypothesis.strategies.just()` is simply a special strategy that just contains a single example.) @@ -153,15 +127,15 @@ To fix the length of dimensions you can instead pass ``dims`` as a mapping of di .. ipython:: python - # Generates only dataarrays with dimensions ["x", "y"], of lengths 2 & 3 respectively - xrst.dataarrays(dims=st.just({"x": 2, "y": 3})).example() + # Generates only variables with dimensions ["x", "y"], of lengths 2 & 3 respectively + xrst.variables(dims=st.just({"x": 2, "y": 3})).example() You can also use this to specify that you want examples which are missing some part of the data structure, for instance .. ipython:: python - # Generates only dataarrays with no coordinates - xrst.datasets(data_vars=st.just({})).example() + # Generates a Variable with no attributes + xrst.variables(attrs=st.just({})).example() Through a combination of chaining strategies and fixing arguments, you can specify quite complicated requirements on the objects your chained strategy will generate. @@ -171,20 +145,19 @@ objects your chained strategy will generate. fixed_x_variable_y_maybe_z = st.fixed_dictionaries( {"x": st.just(2), "y": st.integers(3, 4)}, optional={"z": st.just(2)} ) - fixed_x_variable_y_maybe_z.example() - special_dataarrays = xrst.dataarrays(dims=fixed_x_variable_y_maybe_z) + special_variables = xrst.variables(dims=fixed_x_variable_y_maybe_z) - special_dataarrays.example() - special_dataarrays.example() + special_variables.example() + special_variables.example() Here we have used one of hypothesis' built-in strategies :py:func:`hypothesis.strategies.fixed_dictionaries` to create a strategy which generates mappings of dimension names to lengths (i.e. the ``size`` of the xarray object we want). This particular strategy will always generate an ``x`` dimension of length 2, and a ``y`` dimension of length either 3 or 4, and will sometimes also generate a ``z`` dimension of length 2. -By feeding this strategy for dictionaries into the ``dims`` argument of xarray's :py:func:`~st.dataarrays` strategy, -we can generate arbitrary :py:class:`~xarray.DataArray` objects whose dimensions will always match these specifications. +By feeding this strategy for dictionaries into the ``dims`` argument of xarray's :py:func:`~st.variables` strategy, +we can generate arbitrary :py:class:`~xarray.Variable` objects whose dimensions will always match these specifications. Creating Duck-type Arrays From 7983e34234395379af953ca6c97e5567110721e8 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Thu, 2 Nov 2023 12:12:33 -0400 Subject: [PATCH 094/155] update tests for variables strategy --- xarray/tests/test_strategies.py | 98 +++++++++++++++++++++------------ 1 file changed, 63 insertions(+), 35 deletions(-) diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index 8441d83e206..8c6812e3b16 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -8,7 +8,6 @@ import hypothesis.extra.numpy as npst import hypothesis.strategies as st from hypothesis import given -from hypothesis.errors import InvalidArgument from xarray.core.variable import Variable from xarray.testing.strategies import ( @@ -98,39 +97,18 @@ def test_given_nothing(self, var): assert isinstance(var, Variable) @given(st.data()) - def test_given_fixed_dims_list_and_fixed_data(self, data): - dims = ["x", "y"] - arr = np.asarray([[1, 2], [3, 4]]) - var = data.draw(variables(dims=st.just(dims), data=st.just(arr))) - - assert list(var.dims) == dims - npt.assert_equal(var.data, arr) - - @given(st.data()) - def test_given_arbitrary_dims_list_and_arbitrary_data(self, data): - arrs = np_arrays(shape=(2, 3)) - dims = dimension_names(min_dims=2, max_dims=2) - var = data.draw(variables(data=arrs, dims=dims)) - assert var.shape == (2, 3) - - dims = dimension_names(min_dims=3) - with pytest.raises(InvalidArgument): - data.draw(variables(data=arrs, dims=dims)) + def test_given_incorrect_types(self, data): + with pytest.raises(TypeError, match="SearchStrategy object"): + data.draw(variables(dims=["x", "y"])) - @given(st.data()) - def test_given_fixed_data(self, data): - arr = np.asarray([[1, 2], [3, 4]]) - var = data.draw(variables(data=st.just(arr))) + with pytest.raises(TypeError, match="SearchStrategy object"): + data.draw(variables(dtype=np.dtype("int32"))) - npt.assert_equal(var.data, arr) + with pytest.raises(TypeError, match="SearchStrategy object"): + data.draw(variables(attrs=dict())) - @given(st.data()) - def test_given_arbitrary_data(self, data): - shape = (2, 3) - arrs = np_arrays(shape=shape) - var = data.draw(variables(data=arrs)) - - assert var.data.shape == shape + with pytest.raises(TypeError, match="Callable"): + data.draw(variables(array_strategy_fn=np.array([0]))) @given(st.data()) def test_given_fixed_dims_list(self, data): @@ -155,8 +133,58 @@ def test_given_fixed_sizes(self, data): assert var.shape == (3, 4) @given(st.data()) - def test_given_fixed_sizes_and_arbitrary_data(self, data): - arrs = np_arrays(shape=(2, 3)) - var = data.draw(variables(data=arrs, dims=st.just({"x": 2, "y": 3}))) + def test_given_fixed_dtype(self, data): + var = data.draw(variables(dtype=st.just(np.dtype("int32")))) + + assert var.dtype == np.dtype("int32") + + @given(st.data()) + def test_given_fixed_data(self, data): + arr = np.asarray([[1, 2], [3, 4]]) + + def fixed_array_strategy_fn(*, shape=None, dtype=None): + return st.just(arr) + + var = data.draw( + variables( + array_strategy_fn=fixed_array_strategy_fn, dtype=st.just(arr.dtype) + ) + ) + + npt.assert_equal(var.data, arr) + assert var.dtype == arr.dtype + + @given(st.data()) + def test_given_fixed_dims_and_fixed_data(self, data): + dims = {"x": 2, "y": 2} + arr = np.asarray([[1, 2], [3, 4]]) + + def fixed_array_strategy_fn(*, shape=None, dtype=None): + return st.just(arr) + + var = data.draw( + variables( + array_strategy_fn=fixed_array_strategy_fn, + dims=st.just(dims), + dtype=st.just(arr.dtype), + ) + ) + + assert var.sizes == dims + npt.assert_equal(var.data, arr) + + @given(st.data()) + def test_given_fixed_shape_arbitrary_dims_and_arbitrary_data(self, data): + dims = dimension_names(min_dims=2, max_dims=2) + + def fixed_shape_array_strategy_fn(*, shape=None, dtype=None): + return np_arrays(shape=shape, dtype=dtype) + + var = data.draw( + variables( + array_strategy_fn=fixed_shape_array_strategy_fn, + dims=dims, + ) + ) - assert var.shape == (2, 3) + assert var.ndim == 2 From 2ad7bb0503f5303ebcaf835fae897caafcbf000d Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Thu, 2 Nov 2023 12:20:02 -0400 Subject: [PATCH 095/155] test values in attrs dict --- xarray/testing/strategies.py | 51 ++++++++++++++++++++++++--------- xarray/tests/test_strategies.py | 18 +++++++++++- 2 files changed, 55 insertions(+), 14 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 25eb0c62983..33438196286 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -193,7 +193,8 @@ def variables( Generates arbitrary xarray.Variable objects. Follows the basic signature of the xarray.Variable constructor, but allows passing alternative strategies to - generate either numpy-like array data or dimensions. + generate either numpy-like array data or dimensions. Also allows specifying the shape or dtype of the wrapped array + up front. Passing nothing will generate a completely arbitrary Variable (containing a numpy array). @@ -204,15 +205,17 @@ def variables( array_strategy_fn: Callable which returns a strategy generating array-likes, optional Callable must accept shape and dtype kwargs if passed. If array_strategy_fn is not passed then the shapes will be derived from the dims kwarg. - Default is to generate numpy data of arbitrary shape, values and dtype. + Default is to generate a numpy array of arbitrary shape, values and dtype. dims: Strategy for generating the dimensions, optional Can either be a strategy for generating a sequence of string dimension names, or a strategy for generating a mapping of string dimension names to integer lengths along each dimension. - If provided in the former form the lengths of the returned Variable will either be determined from the - data argument if given or arbitrarily generated if not. + If provided as a mapping the array shape will be passed to array_strategy_fn. Default is to generate arbitrary dimension names for each axis in data. dtype: Strategy which generates np.dtype objects, optional + Will be passed in to array_strategy_fn. attrs: Strategy which generates dicts, optional + Default is to generate a nested attributes dictionary containing arbitrary strings, booleans, integers, Nones, + and numpy arrays. Returns ------- @@ -222,7 +225,7 @@ def variables( Raises ------ TypeError - If custom strategies passed try to draw examples which are not of the correct type. + If custom strategies passed attempt to draw any examples which are not of the correct type. Examples -------- @@ -283,12 +286,19 @@ def variables( for arg in [dims, dtype, attrs] ): raise TypeError( - "Contents must be provided as a hypothesis.strategies.SearchStrategy object (or None)." + "Contents dims, dtype, and attrs must each be provided as a hypothesis.strategies.SearchStrategy object (or None)." "To specify fixed contents, use hypothesis.strategies.just()." ) - if not array_strategy_fn: + if array_strategy_fn is None: array_strategy_fn = np_arrays + elif not callable(array_strategy_fn): + raise TypeError( + "array_strategy_fn must be a Callable that accepts the kwargs dtype and shape and returns a hypothesis " + "strategy which generates corresponding array-like objects." + ) + + _dtype = draw(dtype) if dims is not None: # generate dims first then draw data to match @@ -296,23 +306,38 @@ def variables( if isinstance(_dims, Sequence): dim_names = list(_dims) valid_shapes = npst.array_shapes(min_dims=len(_dims), max_dims=len(_dims)) - array_strategy = array_strategy_fn( - shape=draw(valid_shapes), dtype=draw(dtype) - ) + _shape = draw(valid_shapes) + array_strategy = array_strategy_fn(shape=_shape, dtype=_dtype) elif isinstance(_dims, Mapping): # should be a mapping of form {dim_names: lengths} - dim_names, shape = list(_dims.keys()), tuple(_dims.values()) - array_strategy = array_strategy_fn(shape=shape, dtype=draw(dtype)) + dim_names, _shape = list(_dims.keys()), tuple(_dims.values()) + array_strategy = array_strategy_fn(shape=_shape, dtype=_dtype) else: raise TypeError( f"Invalid type returned by dims strategy - drew an object of type {type(dims)}" ) + _data = draw(array_strategy) + if _data.shape != _shape: + raise TypeError( + "array_strategy_fn returned an array object with a different shape than it was passed." + f"Passed {_data.shape}, but returned {_shape}." + "Please either specify a consistent shape via the dims kwarg or ensure the array_strategy_fn callable " + "obeys the shape argument passed to it." + ) else: # nothing provided, so generate everything consistently by drawing dims to match data - array_strategy = array_strategy_fn(dtype=draw(dtype)) + array_strategy = array_strategy_fn(dtype=_dtype) _data = draw(array_strategy) dim_names = draw(dimension_names(min_dims=_data.ndim, max_dims=_data.ndim)) + if _data.dtype != _dtype: + raise TypeError( + "array_strategy_fn returned an array object with a different dtype than it was passed." + f"Passed {_data.dtype}, but returned {_dtype}" + "Please either specify a consistent dtype via the dtype kwarg or ensure the array_strategy_fn callable " + "obeys the dtype argument passed to it." + ) + return xr.Variable(dims=dim_names, data=_data, attrs=draw(attrs)) diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index 8c6812e3b16..c2d002014f5 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -19,6 +19,8 @@ variables, ) +ALLOWED_ATTRS_VALUES_TYPES = (int, bool, str, np.ndarray) + class TestNumpyArraysStrategy: @given(np_arrays()) @@ -84,11 +86,25 @@ def test_restrict_names(self, data): assert dim.upper() == dim +def check_dict_values(dictionary: dict) -> bool: + for key, value in dictionary.items(): + if isinstance(value, ALLOWED_ATTRS_VALUES_TYPES) or value is None: + continue + elif isinstance(value, dict): + # If the value is a dictionary, recursively check it + if not check_dict_values(value): + return False + else: + # If the value is not an integer or a dictionary, it's not valid + return False + return True + + class TestAttrsStrategy: @given(attrs()) def test_type(self, attrs): assert isinstance(attrs, dict) - # TODO how to test the types of values in a recursive object? + check_dict_values(attrs) class TestVariablesStrategy: From a9f7cd5d58c336ec8da0cd37120249daf2708f34 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Thu, 2 Nov 2023 12:53:00 -0400 Subject: [PATCH 096/155] duck array type examples --- doc/user-guide/testing.rst | 77 ++++++++++++++++++------------------ xarray/testing/strategies.py | 2 +- 2 files changed, 39 insertions(+), 40 deletions(-) diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index b78e913778c..9328268c584 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -166,10 +166,10 @@ Creating Duck-type Arrays Xarray objects don't have to wrap numpy arrays, in fact they can wrap any array type which presents the same API as a numpy array (so-called "duck array wrapping", see :ref:`wrapping numpy-like arrays `). -Imagine we want to write a strategy which generates arbitrary ``DataArray`` objects, each of which wraps a +Imagine we want to write a strategy which generates arbitrary ``Variable`` objects, each of which wraps a :py:class:`sparse.COO` array instead of a ``numpy.ndarray``. How could we do that? There are two ways: -1. Create a xarray object with numpy data and use ``.map()`` to convert the underlying array to a +1. Create a xarray object with numpy data and use the hypothesis' ``.map()`` method to convert the underlying array to a different type: .. ipython:: python @@ -180,68 +180,67 @@ different type: .. ipython:: python :okexcept: - def convert_to_sparse(da): - if da.ndim == 0: - return da + def convert_to_sparse(var): + if var.ndim == 0: + return var else: - da.data = sparse.COO.from_numpy(da.values) - return da + var.data = sparse.COO.from_numpy(da.values) + return var .. ipython:: python :okexcept: - sparse_dataarrays = xrst.dataarrays().map(convert_to_sparse) + sparse_variables = xrst.variables().map(convert_to_sparse) - sparse_dataarrays.example() - sparse_dataarrays.example() + sparse_variables.example() + sparse_variables.example() -2. Pass a strategy which generates the duck-typed arrays directly to the ``data`` argument of the xarray -strategies: +2. Pass a function which returns a strategy which generates the duck-typed arrays directly to the ``array_strategy_fn`` argument of the xarray strategies: .. ipython:: python :okexcept: @st.composite - def sparse_arrays(draw) -> st.SearchStrategy[sparse._coo.core.COO]: + def sparse_random_arrays( + draw, shape: tuple[int] = None + ) -> st.SearchStrategy[sparse._coo.core.COO]: """Strategy which generates random sparse.COO arrays""" - shape = draw(npst.array_shapes()) + if shape is None: + shape = draw(npst.array_shapes()) density = draw(st.integers(min_value=0, max_value=1)) - return sparse.random(shape, density=density) + return sparse.random( + shape=shape, density=density + ) # note sparse.random does not accept a dtype kwarg -.. ipython:: python - :okexcept: - - sparse_dataarrays = xrst.dataarrays(data=sparse_arrays()) - - sparse_dataarrays.example() - sparse_dataarrays.example() -Either approach is fine, but one may be more convenient than the other depending on the type of the duck array which you -want to wrap. + def sparse_random_arrays_fn( + *, shape: tuple[int] = None, dtype: np.dtype = None + ) -> st.SearchStrategy[sparse._coo.core.COO]: + return sparse_arrays(shape=shape) -Creating datasets can be a little more involved. Using method (1) is simple: .. ipython:: python :okexcept: - def convert_ds_to_sparse(ds): - return ds.map(convert_to_sparse) - -.. ipython:: python - :okexcept: - - sparse_datasets = xrst.datasets().map(convert_ds_to_sparse) + sparse_random_variables = xrst.variables( + array_strategy_fn=sparse_random_arrays_fn, dtype=st.just(np.dtype("float64")) + ) + sparse_random_variables.example() - sparse_datasets.example() +Either approach is fine, but one may be more convenient than the other depending on the type of the duck array which you +want to wrap. -but building a dataset from scratch (i.e. method (2)) requires building the dataset object in such as way that all of -the data variables have compatible dimensions. You can build up a dictionary of the form ``{var_name: data_variable}`` -yourself, or you can use the ``data_vars`` argument to the ``data_variables`` strategy (TODO): +If the array type you want to generate has a top-level namespace (e.g. that which is conventionally imported as ``xp`` or similar), +you can use this neat trick: .. ipython:: python :okexcept: - sparse_data_vars = xrst.data_variables(data=sparse_arrays()) - sparse_datasets = xrst.datasets(data_vars=sparse_data_vars) + import numpy.array_api as xp # available in numpy 1.26.0 + + from hypothesis.extra.array_api import make_strategies_namespace - sparse_datasets.example() + numpy_variables = xrst.variables( + array_strategy_fn=make_strategies_namespace(xp).arrays + ) + numpy_variables.example() diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 33438196286..8deaef15229 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -335,7 +335,7 @@ def variables( if _data.dtype != _dtype: raise TypeError( "array_strategy_fn returned an array object with a different dtype than it was passed." - f"Passed {_data.dtype}, but returned {_dtype}" + f"Passed {_dtype}, but returned {_data.dtype}" "Please either specify a consistent dtype via the dtype kwarg or ensure the array_strategy_fn callable " "obeys the dtype argument passed to it." ) From 49a1c644c89243c89d59c3e5f7f7457a3ba7bfef Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Thu, 2 Nov 2023 12:54:28 -0400 Subject: [PATCH 097/155] altered whatsnew --- doc/whats-new.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index bcf05a2bb72..db3ba0a25ce 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -22,7 +22,7 @@ v2023.10.2 (unreleased) New Features ~~~~~~~~~~~~ -- Added a suite of hypothesis strategies for generating xarray objects containing arbitrary data, useful for testing. +- Added hypothesis strategies for generating :py:class:`xarray.Variable` objects containing arbitrary data, useful for parametrizing downstream tests. Accessible under :py:func:`testing.strategies`, and documented in a new page on testing in the User Guide. (:issue:`6911`, :pull:`6908`) By `Tom Nicholas `_. From c1f19740ee7b0858f5f296f63322b336cec76bd4 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Thu, 2 Nov 2023 13:11:14 -0400 Subject: [PATCH 098/155] maybe fix mypy --- xarray/testing/strategies.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 8deaef15229..f65ea2be298 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -20,7 +20,11 @@ class ArrayStrategyFn(Protocol): def __call__( - self, *, shape: tuple[int, ...] = None, dtype: np.dtype = None, **kwargs + self, + *, + shape: tuple[int, ...] | None = None, + dtype: np.dtype | None = None, + **kwargs, ) -> st.SearchStrategy[T_DuckArray]: ... From 6482ad391ece69b8c933c1ab912391773bce7c54 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Thu, 2 Nov 2023 14:11:48 -0400 Subject: [PATCH 099/155] fix some mypy errors --- xarray/testing/strategies.py | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index f65ea2be298..ca3886f7610 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -22,8 +22,8 @@ class ArrayStrategyFn(Protocol): def __call__( self, *, - shape: tuple[int, ...] | None = None, - dtype: np.dtype | None = None, + shape: Union[tuple[int, ...], None] = None, + dtype: Union[np.dtype, None] = None, **kwargs, ) -> st.SearchStrategy[T_DuckArray]: ... @@ -118,7 +118,7 @@ def dimension_sizes( min_dims: int = 0, max_dims: int = 3, min_side: int = 1, - max_side: int = None, + max_side: Union[int, None] = None, ) -> st.SearchStrategy[Mapping[Hashable, int]]: """ Generates an arbitrary mapping from dimension names to lengths. @@ -188,8 +188,10 @@ def attrs() -> st.SearchStrategy[Mapping[Hashable, Any]]: def variables( draw: st.DrawFn, *, - array_strategy_fn: ArrayStrategyFn = None, - dims: st.SearchStrategy[Union[Sequence[Hashable], Mapping[Hashable, int]]] = None, + array_strategy_fn: Union[ArrayStrategyFn, None] = None, + dims: Union[ + st.SearchStrategy[Union[Sequence[Hashable], Mapping[Hashable, int]]], None + ] = None, dtype: st.SearchStrategy[np.dtype] = numeric_dtypes(), attrs: st.SearchStrategy[Mapping] = attrs(), ) -> xr.Variable: @@ -294,13 +296,19 @@ def variables( "To specify fixed contents, use hypothesis.strategies.just()." ) + _array_strategy_fn: ArrayStrategyFn + array_strategy: st.SearchStrategy[T_DuckArray] if array_strategy_fn is None: - array_strategy_fn = np_arrays + _array_strategy_fn = np_arrays # type: ignore[assignment] elif not callable(array_strategy_fn): raise TypeError( "array_strategy_fn must be a Callable that accepts the kwargs dtype and shape and returns a hypothesis " "strategy which generates corresponding array-like objects." ) + else: + _array_strategy_fn = ( + array_strategy_fn # satisfy mypy that this new variable cannot be None + ) _dtype = draw(dtype) @@ -311,11 +319,11 @@ def variables( dim_names = list(_dims) valid_shapes = npst.array_shapes(min_dims=len(_dims), max_dims=len(_dims)) _shape = draw(valid_shapes) - array_strategy = array_strategy_fn(shape=_shape, dtype=_dtype) + array_strategy = _array_strategy_fn(shape=_shape, dtype=_dtype) elif isinstance(_dims, Mapping): # should be a mapping of form {dim_names: lengths} dim_names, _shape = list(_dims.keys()), tuple(_dims.values()) - array_strategy = array_strategy_fn(shape=_shape, dtype=_dtype) + array_strategy = _array_strategy_fn(shape=_shape, dtype=_dtype) else: raise TypeError( f"Invalid type returned by dims strategy - drew an object of type {type(dims)}" @@ -332,7 +340,7 @@ def variables( else: # nothing provided, so generate everything consistently by drawing dims to match data - array_strategy = array_strategy_fn(dtype=_dtype) + array_strategy = _array_strategy_fn(dtype=_dtype) _data = draw(array_strategy) dim_names = draw(dimension_names(min_dims=_data.ndim, max_dims=_data.ndim)) From 95cab79370224eb2c6f0854baba352f4f04ee05b Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Thu, 2 Nov 2023 14:52:12 -0400 Subject: [PATCH 100/155] more typing changes --- xarray/testing/strategies.py | 12 ++++++++---- xarray/tests/test_strategies.py | 14 +++++++------- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index ca3886f7610..207b17e75c9 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -1,12 +1,15 @@ from collections.abc import Hashable, Mapping, Sequence -from typing import Any, Protocol, Union +from typing import TYPE_CHECKING, Any, Protocol, Union import hypothesis.extra.numpy as npst import hypothesis.strategies as st import numpy as np import xarray as xr -from xarray.core.types import T_DuckArray + +if TYPE_CHECKING: + from xarray.core.types import T_DuckArray + __all__ = [ "numeric_dtypes", @@ -190,7 +193,8 @@ def variables( *, array_strategy_fn: Union[ArrayStrategyFn, None] = None, dims: Union[ - st.SearchStrategy[Union[Sequence[Hashable], Mapping[Hashable, int]]], None + st.SearchStrategy[Union[Sequence[Hashable], Mapping[Hashable, int]]], + None, ] = None, dtype: st.SearchStrategy[np.dtype] = numeric_dtypes(), attrs: st.SearchStrategy[Mapping] = attrs(), @@ -320,7 +324,7 @@ def variables( valid_shapes = npst.array_shapes(min_dims=len(_dims), max_dims=len(_dims)) _shape = draw(valid_shapes) array_strategy = _array_strategy_fn(shape=_shape, dtype=_dtype) - elif isinstance(_dims, Mapping): + elif isinstance(_dims, (Mapping, dict)): # should be a mapping of form {dim_names: lengths} dim_names, _shape = list(_dims.keys()), tuple(_dims.values()) array_strategy = _array_strategy_fn(shape=_shape, dtype=_dtype) diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index c2d002014f5..fbe8d2cde2b 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -115,16 +115,16 @@ def test_given_nothing(self, var): @given(st.data()) def test_given_incorrect_types(self, data): with pytest.raises(TypeError, match="SearchStrategy object"): - data.draw(variables(dims=["x", "y"])) + data.draw(variables(dims=["x", "y"])) # type: ignore[arg-type] with pytest.raises(TypeError, match="SearchStrategy object"): - data.draw(variables(dtype=np.dtype("int32"))) + data.draw(variables(dtype=np.dtype("int32"))) # type: ignore[arg-type] with pytest.raises(TypeError, match="SearchStrategy object"): - data.draw(variables(attrs=dict())) + data.draw(variables(attrs=dict())) # type: ignore[arg-type] with pytest.raises(TypeError, match="Callable"): - data.draw(variables(array_strategy_fn=np.array([0]))) + data.draw(variables(array_strategy_fn=np.array([0]))) # type: ignore[arg-type] @given(st.data()) def test_given_fixed_dims_list(self, data): @@ -143,7 +143,7 @@ def test_given_arbitrary_dims_list(self, data): @given(st.data()) def test_given_fixed_sizes(self, data): dims = {"x": 3, "y": 4} - var = data.draw(variables(dims=st.just(dims))) + var = data.draw(variables(dims=st.just(dims))) # type: ignore[arg-type] assert var.dims == ("x", "y") assert var.shape == (3, 4) @@ -163,7 +163,7 @@ def fixed_array_strategy_fn(*, shape=None, dtype=None): var = data.draw( variables( - array_strategy_fn=fixed_array_strategy_fn, dtype=st.just(arr.dtype) + array_strategy_fn=fixed_array_strategy_fn, dtype=st.just(arr.dtype) # type: ignore[arg-type] ) ) @@ -181,7 +181,7 @@ def fixed_array_strategy_fn(*, shape=None, dtype=None): var = data.draw( variables( array_strategy_fn=fixed_array_strategy_fn, - dims=st.just(dims), + dims=st.just(dims), # type: ignore[arg-type] dtype=st.just(arr.dtype), ) ) From 839c4f0da6764d3cb54e99290b9fb4e61ef0263b Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Thu, 2 Nov 2023 15:34:46 -0400 Subject: [PATCH 101/155] fix import --- xarray/testing/strategies.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 207b17e75c9..d45dcb47bff 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -1,15 +1,12 @@ from collections.abc import Hashable, Mapping, Sequence -from typing import TYPE_CHECKING, Any, Protocol, Union +from typing import Any, Protocol, Union import hypothesis.extra.numpy as npst import hypothesis.strategies as st import numpy as np import xarray as xr - -if TYPE_CHECKING: - from xarray.core.types import T_DuckArray - +from xarray.core.types import T_DuckArray __all__ = [ "numeric_dtypes", From ded711ac6c614db75df7e5ebe867e76c2f8e8602 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Thu, 2 Nov 2023 16:29:24 -0400 Subject: [PATCH 102/155] skip doctests in docstrings --- xarray/testing/strategies.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index d45dcb47bff..3707b18bc47 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -238,10 +238,10 @@ def variables( -------- Generate completely arbitrary Variable objects backed by a numpy array: - >>> variables().example() + >>> variables().example() # doctest: +SKIP array([43506, -16, -151], dtype=int32) - >>> variables().example() + >>> variables().example() # doctest: +SKIP array([[[-10000000., -10000000.], [-10000000., -10000000.]], @@ -259,7 +259,7 @@ def variables( Generate only Variable objects with certain dimension names: - >>> variables(dims=st.just(["a", "b"])).example() + >>> variables(dims=st.just(["a", "b"])).example() # doctest: +SKIP array([[ 248, 4294967295, 4294967295], [2412855555, 3514117556, 4294967295], @@ -269,7 +269,7 @@ def variables( Generate only Variable objects with certain dimension names and lengths: - >>> variables(dims=st.just({"a": 2, "b": 1})).example() + >>> variables(dims=st.just({"a": 2, "b": 1})).example() # doctest: +SKIP array([[-1.00000000e+007+3.40282347e+038j], [-2.75034266e-225+2.22507386e-311j]]) @@ -280,7 +280,7 @@ def variables( >>> import cupy as cp >>> cupy_strategy_fn = make_strategies_namespace(cp).arrays >>> cupy_da = variables(array_strategy_fn=cupy_strategy_fn).example() - >>> cupy_da + >>> cupy_da # doctest: +SKIP array([[ 0., 1., 2.], [ 3., 4., 5.]], dtype=float32) From f3c80ed46c1af10e02ac9de643c298726f4bc6a5 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Thu, 2 Nov 2023 17:34:03 -0400 Subject: [PATCH 103/155] fix link to duckarrays page --- doc/user-guide/testing.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index 9328268c584..b4f52403a54 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -164,7 +164,7 @@ Creating Duck-type Arrays ~~~~~~~~~~~~~~~~~~~~~~~~~ Xarray objects don't have to wrap numpy arrays, in fact they can wrap any array type which presents the same API as a -numpy array (so-called "duck array wrapping", see :ref:`wrapping numpy-like arrays `). +numpy array (so-called "duck array wrapping", see :ref:`wrapping numpy-like arrays `). Imagine we want to write a strategy which generates arbitrary ``Variable`` objects, each of which wraps a :py:class:`sparse.COO` array instead of a ``numpy.ndarray``. How could we do that? There are two ways: From 010f28cd8f76821052cab80f8c3f0ae071a491fe Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Thu, 2 Nov 2023 17:46:14 -0400 Subject: [PATCH 104/155] don't actually try to run cupy in docs env --- xarray/testing/strategies.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 3707b18bc47..7138629ee86 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -274,12 +274,14 @@ def variables( array([[-1.00000000e+007+3.40282347e+038j], [-2.75034266e-225+2.22507386e-311j]]) - Generate completely arbitrary Variable objects backed by a sparse array: + Generate completely arbitrary Variable objects backed by a cupy GPU array: >>> from hypothesis.extra.array_api import make_strategies_namespace - >>> import cupy as cp - >>> cupy_strategy_fn = make_strategies_namespace(cp).arrays - >>> cupy_da = variables(array_strategy_fn=cupy_strategy_fn).example() + >>> import cupy as cp # doctest: +SKIP + >>> cupy_strategy_fn = make_strategies_namespace(cp).arrays # doctest: +SKIP + >>> cupy_da = variables( + ... array_strategy_fn=cupy_strategy_fn + ... ).example() # doctest: +SKIP >>> cupy_da # doctest: +SKIP array([[ 0., 1., 2.], From 4b0799200b9f90911feec4ccb8ee3af89f169b34 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Thu, 2 Nov 2023 17:57:04 -0400 Subject: [PATCH 105/155] missed a skip --- xarray/testing/strategies.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 7138629ee86..47f95dd54c7 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -286,7 +286,7 @@ def variables( array([[ 0., 1., 2.], [ 3., 4., 5.]], dtype=float32) - >>> cupy_da.data.device + >>> cupy_da.data.device # doctest: +SKIP """ From ba99afacddc3a33a0574498a2794beeb23ad22ea Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Thu, 2 Nov 2023 18:08:12 -0400 Subject: [PATCH 106/155] okwarning --- doc/user-guide/testing.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index b4f52403a54..7697ff50a0b 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -234,7 +234,7 @@ If the array type you want to generate has a top-level namespace (e.g. that whic you can use this neat trick: .. ipython:: python - :okexcept: + :okwarning: import numpy.array_api as xp # available in numpy 1.26.0 From 700d652398d341750397bcc1fbd2655bfa3a3ef0 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Thu, 2 Nov 2023 18:30:35 -0400 Subject: [PATCH 107/155] just remove the cupy example --- xarray/testing/strategies.py | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 47f95dd54c7..aaa069c4883 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -273,21 +273,6 @@ def variables( array([[-1.00000000e+007+3.40282347e+038j], [-2.75034266e-225+2.22507386e-311j]]) - - Generate completely arbitrary Variable objects backed by a cupy GPU array: - - >>> from hypothesis.extra.array_api import make_strategies_namespace - >>> import cupy as cp # doctest: +SKIP - >>> cupy_strategy_fn = make_strategies_namespace(cp).arrays # doctest: +SKIP - >>> cupy_da = variables( - ... array_strategy_fn=cupy_strategy_fn - ... ).example() # doctest: +SKIP - >>> cupy_da # doctest: +SKIP - - array([[ 0., 1., 2.], - [ 3., 4., 5.]], dtype=float32) - >>> cupy_da.data.device # doctest: +SKIP - """ if any( From 0e01d767ed2ccad0278d7a2c35d66d2e9335fa8d Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Thu, 2 Nov 2023 18:34:30 -0400 Subject: [PATCH 108/155] ensure shape is always passed to array_strategy_fn --- xarray/testing/strategies.py | 6 ++++-- xarray/tests/test_strategies.py | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index aaa069c4883..3750c3517ed 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -327,8 +327,10 @@ def variables( ) else: - # nothing provided, so generate everything consistently by drawing dims to match data - array_strategy = _array_strategy_fn(dtype=_dtype) + # nothing provided, so generate everything consistently + # We still generate the shape first here just so that we always pass shape to array_strategy_fn + _shape = draw(npst.array_shapes()) + array_strategy = _array_strategy_fn(shape=_shape, dtype=_dtype) _data = draw(array_strategy) dim_names = draw(dimension_names(min_dims=_data.ndim, max_dims=_data.ndim)) diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index fbe8d2cde2b..81cd46aa7ed 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -163,7 +163,7 @@ def fixed_array_strategy_fn(*, shape=None, dtype=None): var = data.draw( variables( - array_strategy_fn=fixed_array_strategy_fn, dtype=st.just(arr.dtype) # type: ignore[arg-type] + array_strategy_fn=fixed_array_strategy_fn, dims=st.just({"x": 2, "y": 2}), dtype=st.just(arr.dtype) # type: ignore[arg-type] ) ) From 79f40f012370d8977f6f0d463b0a7598753c10de Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Fri, 3 Nov 2023 18:52:33 -0400 Subject: [PATCH 109/155] test using make_strategies_namespace --- doc/user-guide/testing.rst | 30 ++++++++---- xarray/testing/strategies.py | 81 ++++++++++++++++++++------------- xarray/tests/__init__.py | 1 + xarray/tests/test_strategies.py | 60 ++++++++++++------------ 4 files changed, 101 insertions(+), 71 deletions(-) diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index 7697ff50a0b..81255f0fd75 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -45,15 +45,13 @@ These strategies are accessible in the :py:mod:`xarray.testing.strategies` modul .. autosummary:: - testing.strategies.numeric_dtypes - testing.strategies.np_arrays testing.strategies.names testing.strategies.dimension_names testing.strategies.dimension_sizes testing.strategies.attrs testing.strategies.variables -These build upon the numpy strategies offered in :py:mod:`hypothesis.extra.numpy`: +These build upon the numpy and array API strategies offered in :py:mod:`hypothesis.extra.numpy` and :py:mod:`hypothesis.extra.array_api`: .. ipython:: python @@ -159,7 +157,6 @@ length either 3 or 4, and will sometimes also generate a ``z`` dimension of leng By feeding this strategy for dictionaries into the ``dims`` argument of xarray's :py:func:`~st.variables` strategy, we can generate arbitrary :py:class:`~xarray.Variable` objects whose dimensions will always match these specifications. - Creating Duck-type Arrays ~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -230,17 +227,32 @@ different type: Either approach is fine, but one may be more convenient than the other depending on the type of the duck array which you want to wrap. -If the array type you want to generate has a top-level namespace (e.g. that which is conventionally imported as ``xp`` or similar), +Compatibility with the Python Array API Standard +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The strategies defined in :py:mod:`testing.strategies` are **not** guaranteed to use array API standard-compliant +dtypes by default. +For example arrays with the dtype ``np.dtype('float16')`` may be generated by :py:func:`testing.strategies.variables` +(assuming the ``dtype`` kwarg was not explicitly passed), despite ``np.dtype('float16')`` not being in the +array API standard. + +If the array type you want to generate has an array API-compliant top-level namespace +(e.g. that which is conventionally imported as ``xp`` or similar), you can use this neat trick: .. ipython:: python :okwarning: - import numpy.array_api as xp # available in numpy 1.26.0 + from numpy import array_api as xp # available in numpy 1.26.0 from hypothesis.extra.array_api import make_strategies_namespace - numpy_variables = xrst.variables( - array_strategy_fn=make_strategies_namespace(xp).arrays + xps = make_strategies_namespace(xp) + + xp_variables = xrst.variables( + array_strategy_fn=xps.arrays, + dtype=xps.scalar_dtypes(), ) - numpy_variables.example() + xp_variables.example() + +Another array API-compliant duck array library would replace the import, e.g. ``import cupy as cp`` instead. diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 3750c3517ed..12e59b73399 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -9,7 +9,7 @@ from xarray.core.types import T_DuckArray __all__ = [ - "numeric_dtypes", + "supported_dtypes", "names", "dimension_names", "dimension_sizes", @@ -29,14 +29,19 @@ def __call__( ... -# required to exclude weirder dtypes e.g. unicode, byte_string, array, or nested dtypes. -def numeric_dtypes() -> st.SearchStrategy[np.dtype]: +# +def supported_dtypes() -> st.SearchStrategy[np.dtype]: """ Generates only those numpy dtypes which xarray can handle. Requires the hypothesis package to be installed. - """ + Not just using hypothesis.extra.numpy.scalar_dtypes is required to exclude weirder dtypes + e.g. unicode, byte_string, array, or nested dtypes. + Also required to dodge bugs with pandas non-nanosecond datetime overflows. + """ + # TODO should this be exposed publicly? + # We should at least decide what the set of numpy dtypes that xarray officially supports is. return ( npst.integer_dtypes() | npst.unsigned_integer_dtypes() @@ -45,15 +50,20 @@ def numeric_dtypes() -> st.SearchStrategy[np.dtype]: ) -def np_arrays( - *, +def smallish_arrays( shape: Union[ tuple[int, ...], st.SearchStrategy[tuple[int, ...]] ] = npst.array_shapes(max_side=4), - dtype: Union[np.dtype, st.SearchStrategy[np.dtype]] = numeric_dtypes(), + dtype: Union[np.dtype, st.SearchStrategy[np.dtype]] = supported_dtypes(), + *, + elements=None, + fill=None, + unique=False, ) -> st.SearchStrategy[np.ndarray]: """ - Generates arbitrary numpy arrays with xarray-compatible dtypes. + Generates arbitrary array API-compliant numpy arrays. + + By default generates arrays with no more than 4 elements per axis for performance, using supported_dtypes. Requires the hypothesis package to be installed. @@ -61,10 +71,15 @@ def np_arrays( ---------- shape dtype - Default is to use any of the numeric_dtypes defined for xarray. + Default is to use any of the scalar dtypes defined in the array API standard. + elements + fill + unique """ - - return npst.arrays(dtype=dtype, shape=shape) + # TODO here we may also wish to generalize/restrict the dtypes produced by xarray's default test strategies + return npst.arrays( + dtype=dtype, shape=shape, elements=elements, fill=fill, unique=unique + ) # TODO Generalize to all valid unicode characters once formatting bugs in xarray's reprs are fixed + docs can handle it. @@ -160,11 +175,12 @@ def dimension_sizes( max_size=5, ) _attr_keys = _readable_strings -_small_arrays = np_arrays( +_small_arrays = npst.arrays( shape=npst.array_shapes( max_side=2, max_dims=2, - ) + ), + dtype=npst.scalar_dtypes(), ) _attr_values = st.none() | st.booleans() | _readable_strings | _small_arrays @@ -193,7 +209,7 @@ def variables( st.SearchStrategy[Union[Sequence[Hashable], Mapping[Hashable, int]]], None, ] = None, - dtype: st.SearchStrategy[np.dtype] = numeric_dtypes(), + dtype: st.SearchStrategy[np.dtype] = supported_dtypes(), attrs: st.SearchStrategy[Mapping] = attrs(), ) -> xr.Variable: """ @@ -210,9 +226,8 @@ def variables( Parameters ---------- array_strategy_fn: Callable which returns a strategy generating array-likes, optional - Callable must accept shape and dtype kwargs if passed. - If array_strategy_fn is not passed then the shapes will be derived from the dims kwarg. - Default is to generate a numpy array of arbitrary shape, values and dtype. + Callable must accept shape and dtype kwargs, and must generate results consistent with its input. + If not passed the default is to generate a small numpy array with one of the supported_dtypes. dims: Strategy for generating the dimensions, optional Can either be a strategy for generating a sequence of string dimension names, or a strategy for generating a mapping of string dimension names to integer lengths along each dimension. @@ -220,6 +235,8 @@ def variables( Default is to generate arbitrary dimension names for each axis in data. dtype: Strategy which generates np.dtype objects, optional Will be passed in to array_strategy_fn. + Default is to generate any scalar dtype using supported_dtypes. + Be aware that this default set of dtypes includes some not strictly allowed by the array API standard. attrs: Strategy which generates dicts, optional Default is to generate a nested attributes dictionary containing arbitrary strings, booleans, integers, Nones, and numpy arrays. @@ -232,7 +249,8 @@ def variables( Raises ------ TypeError - If custom strategies passed attempt to draw any examples which are not of the correct type. + If a custom array_strategy_fn returns a strategy which generates an example array inconsistent with the shape + & dtype input passed to it. Examples -------- @@ -287,7 +305,7 @@ def variables( _array_strategy_fn: ArrayStrategyFn array_strategy: st.SearchStrategy[T_DuckArray] if array_strategy_fn is None: - _array_strategy_fn = np_arrays # type: ignore[assignment] + _array_strategy_fn = smallish_arrays # type: ignore[assignment] elif not callable(array_strategy_fn): raise TypeError( "array_strategy_fn must be a Callable that accepts the kwargs dtype and shape and returns a hypothesis " @@ -316,24 +334,25 @@ def variables( raise TypeError( f"Invalid type returned by dims strategy - drew an object of type {type(dims)}" ) - - _data = draw(array_strategy) - if _data.shape != _shape: - raise TypeError( - "array_strategy_fn returned an array object with a different shape than it was passed." - f"Passed {_data.shape}, but returned {_shape}." - "Please either specify a consistent shape via the dims kwarg or ensure the array_strategy_fn callable " - "obeys the shape argument passed to it." - ) - else: # nothing provided, so generate everything consistently # We still generate the shape first here just so that we always pass shape to array_strategy_fn _shape = draw(npst.array_shapes()) array_strategy = _array_strategy_fn(shape=_shape, dtype=_dtype) - _data = draw(array_strategy) - dim_names = draw(dimension_names(min_dims=_data.ndim, max_dims=_data.ndim)) + dim_names = draw(dimension_names(min_dims=len(_shape), max_dims=len(_shape))) + # TODO move this inside the else loop above so that dim_names can be made consistently with it + # this allows for passing a restrictive array_strategy_function without specifying dims to match + # TODO same problem also for dtype + _data = draw(array_strategy) + + if _data.shape != _shape: + raise TypeError( + "array_strategy_fn returned an array object with a different shape than it was passed." + f"Passed {_shape}, but returned {_data.shape}." + "Please either specify a consistent shape via the dims kwarg or ensure the array_strategy_fn callable " + "obeys the shape argument passed to it." + ) if _data.dtype != _dtype: raise TypeError( "array_strategy_fn returned an array object with a different dtype than it was passed." diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index 07ba0be6a8c..f6c31164c33 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -95,6 +95,7 @@ def _importorskip( requires_pandas_version_two = pytest.mark.skipif( not has_pandas_version_two, reason="requires pandas 2.0.0" ) +has_numpy_array_api, requires_numpy_array_api = _importorskip("numpy", "1.26.0") # change some global options for tests set_options(warn_for_unclosed_files=True) diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index 81cd46aa7ed..6487c63b852 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -8,46 +8,21 @@ import hypothesis.extra.numpy as npst import hypothesis.strategies as st from hypothesis import given +from hypothesis.extra.array_api import make_strategies_namespace from xarray.core.variable import Variable from xarray.testing.strategies import ( attrs, dimension_names, dimension_sizes, - np_arrays, - numeric_dtypes, + supported_dtypes, variables, ) +from xarray.tests import requires_numpy_array_api ALLOWED_ATTRS_VALUES_TYPES = (int, bool, str, np.ndarray) -class TestNumpyArraysStrategy: - @given(np_arrays()) - def test_given_nothing(self, arr): - assert isinstance(arr, np.ndarray) - - @given(np_arrays(dtype=np.dtype("int32"))) - def test_fixed_dtype(self, arr): - assert arr.dtype == np.dtype("int32") - - @given(st.data()) - def test_arbitrary_valid_dtype(self, data): - valid_dtype = data.draw(numeric_dtypes()) - arr = data.draw(np_arrays(dtype=valid_dtype)) - assert arr.dtype == valid_dtype - - @given(np_arrays(shape=(2, 3))) - def test_fixed_shape(self, arr): - assert arr.shape == (2, 3) - - @given(st.data()) - def test_arbitrary_shape(self, data): - shape = data.draw(npst.array_shapes()) - arr = data.draw(np_arrays(shape=shape)) - assert arr.shape == shape - - class TestDimensionNamesStrategy: @given(dimension_names()) def test_types(self, dims): @@ -193,14 +168,37 @@ def fixed_array_strategy_fn(*, shape=None, dtype=None): def test_given_fixed_shape_arbitrary_dims_and_arbitrary_data(self, data): dims = dimension_names(min_dims=2, max_dims=2) - def fixed_shape_array_strategy_fn(*, shape=None, dtype=None): - return np_arrays(shape=shape, dtype=dtype) + def array_strategy_fn(*, shape=None, dtype=None): + return npst.arrays(shape=shape, dtype=dtype) var = data.draw( variables( - array_strategy_fn=fixed_shape_array_strategy_fn, + array_strategy_fn=array_strategy_fn, dims=dims, + dtype=supported_dtypes(), ) ) assert var.ndim == 2 + + @requires_numpy_array_api + @given(st.data()) + def test_make_strategies_namespace(self, data): + """ + Test not causing a hypothesis.InvalidArgument by generating a dtype that's not in the array API. + + We still want to generate dtypes not in the array API by default, but this checks we don't accidentally override + the user's choice of dtypes with non-API-compliant ones. + """ + from numpy import ( + array_api as np_array_api, # requires numpy>=1.26.0, and we expect a UserWarning to be raised + ) + + np_array_api_st = make_strategies_namespace(np_array_api) + + data.draw( + variables( + array_strategy_fn=np_array_api_st.arrays, + dtype=np_array_api_st.scalar_dtypes(), + ) + ) From 4ff57ec08e2b5920da19376dff939242cd7180f0 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Fri, 3 Nov 2023 19:06:16 -0400 Subject: [PATCH 110/155] test catching array_strategy_fn that returns different dtype --- xarray/testing/strategies.py | 6 +++--- xarray/tests/test_strategies.py | 16 ++++++++++++++++ 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 12e59b73399..b6a2c91a0c9 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -248,7 +248,7 @@ def variables( Raises ------ - TypeError + ValueError If a custom array_strategy_fn returns a strategy which generates an example array inconsistent with the shape & dtype input passed to it. @@ -347,14 +347,14 @@ def variables( _data = draw(array_strategy) if _data.shape != _shape: - raise TypeError( + raise ValueError( "array_strategy_fn returned an array object with a different shape than it was passed." f"Passed {_shape}, but returned {_data.shape}." "Please either specify a consistent shape via the dims kwarg or ensure the array_strategy_fn callable " "obeys the shape argument passed to it." ) if _data.dtype != _dtype: - raise TypeError( + raise ValueError( "array_strategy_fn returned an array object with a different dtype than it was passed." f"Passed {_dtype}, but returned {_data.dtype}" "Please either specify a consistent dtype via the dtype kwarg or ensure the array_strategy_fn callable " diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index 6487c63b852..40e80f37c5f 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -181,6 +181,22 @@ def array_strategy_fn(*, shape=None, dtype=None): assert var.ndim == 2 + @given(st.data()) + def test_catch_unruly_dtype_from_custom_array_strategy_fn(self, data): + def dodgy_array_strategy_fn(*, shape=None, dtype=None): + """Dodgy function which ignores the dtype it was passed""" + return npst.arrays(shape=shape, dtype=npst.floating_dtypes()) + + with pytest.raises( + ValueError, match="returned an array object with a different dtype" + ): + data.draw( + variables( + array_strategy_fn=dodgy_array_strategy_fn, + dtype=st.just(np.dtype("int32")), + ) + ) + @requires_numpy_array_api @given(st.data()) def test_make_strategies_namespace(self, data): From 959222e2c5ac32c7deb6e83ceadd2d359835a1ad Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Fri, 3 Nov 2023 19:37:02 -0400 Subject: [PATCH 111/155] test catching array_strategy_fn that returns different shape --- xarray/tests/test_strategies.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index 40e80f37c5f..25c72192bd9 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -197,6 +197,23 @@ def dodgy_array_strategy_fn(*, shape=None, dtype=None): ) ) + @given(st.data()) + def test_catch_unruly_shape_from_custom_array_strategy_fn(self, data): + def dodgy_array_strategy_fn(*, shape=None, dtype=None): + """Dodgy function which ignores the shape it was passed""" + return npst.arrays(shape=(3, 2), dtype=dtype) + + with pytest.raises( + ValueError, match="returned an array object with a different shape" + ): + data.draw( + variables( + array_strategy_fn=dodgy_array_strategy_fn, + dims=st.just({"a": 2, "b": 1}), + dtype=supported_dtypes(), + ) + ) + @requires_numpy_array_api @given(st.data()) def test_make_strategies_namespace(self, data): From 78825c44aecbe6602ae32b9122bbc837f519a0a2 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Fri, 3 Nov 2023 19:40:02 -0400 Subject: [PATCH 112/155] generalise test of attrs strategy --- xarray/tests/test_strategies.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index 25c72192bd9..acfcdd13070 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -61,13 +61,14 @@ def test_restrict_names(self, data): assert dim.upper() == dim -def check_dict_values(dictionary: dict) -> bool: +def check_dict_values(dictionary: dict, allowed_attrs_values_types: set) -> bool: + """Helper function to assert that all values in recursive dict match one of a set of types.""" for key, value in dictionary.items(): - if isinstance(value, ALLOWED_ATTRS_VALUES_TYPES) or value is None: + if isinstance(value, allowed_attrs_values_types) or value is None: continue elif isinstance(value, dict): # If the value is a dictionary, recursively check it - if not check_dict_values(value): + if not check_dict_values(value, allowed_attrs_values_types): return False else: # If the value is not an integer or a dictionary, it's not valid @@ -79,7 +80,7 @@ class TestAttrsStrategy: @given(attrs()) def test_type(self, attrs): assert isinstance(attrs, dict) - check_dict_values(attrs) + check_dict_values(attrs, ALLOWED_ATTRS_VALUES_TYPES) class TestVariablesStrategy: From 2418a618643c946d4a609b4a2df739ff4ce695e8 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Fri, 3 Nov 2023 19:43:58 -0400 Subject: [PATCH 113/155] remove misguided comments --- xarray/testing/strategies.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index b6a2c91a0c9..58918135352 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -41,7 +41,7 @@ def supported_dtypes() -> st.SearchStrategy[np.dtype]: Also required to dodge bugs with pandas non-nanosecond datetime overflows. """ # TODO should this be exposed publicly? - # We should at least decide what the set of numpy dtypes that xarray officially supports is. + # We should decide what the set of numpy dtypes that xarray officially supports is. return ( npst.integer_dtypes() | npst.unsigned_integer_dtypes() @@ -341,9 +341,6 @@ def variables( array_strategy = _array_strategy_fn(shape=_shape, dtype=_dtype) dim_names = draw(dimension_names(min_dims=len(_shape), max_dims=len(_shape))) - # TODO move this inside the else loop above so that dim_names can be made consistently with it - # this allows for passing a restrictive array_strategy_function without specifying dims to match - # TODO same problem also for dtype _data = draw(array_strategy) if _data.shape != _shape: From adca1d29032443aa5813e8a33a17b5b21c1c3c1c Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Fri, 3 Nov 2023 20:48:48 -0400 Subject: [PATCH 114/155] save working version of test_mean --- xarray/testing/strategies.py | 45 +++++++++++++++++++++++++++++++++ xarray/tests/test_strategies.py | 23 +++++++++++++++++ 2 files changed, 68 insertions(+) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 58918135352..8fb28ca89de 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -359,3 +359,48 @@ def variables( ) return xr.Variable(dims=dim_names, data=_data, attrs=draw(attrs)) + + +@st.composite +def unique_subset_of( + draw: st.DrawFn, + d: Mapping[Hashable, Any], + *, + min_size: int = 0, + max_size: Union[int, None] = None, +) -> Mapping[Hashable, Any]: + """ + Return a strategy which generates a unique subset of the given mapping. + + Each entry in the output subset will have a unique key. + + Requires the hypothesis package to be installed. + + Parameters + ---------- + d: Mapping[Hashable, Any] + Mapping from which to sample to produce the subset. + min_size: int, optional + Minimum size of the returned subset. Default is 0. + max_size: int, optional + Maximum size of the returned subset. Default is the full length of the input. + If set to 0 the result will be an empty mapping. + + Returns + ------- + unique_subset_strategy + + Examples + -------- + + """ + # TODO generalize this to work for any iterable? Could then be used on dimension_names as well as dimension_sizes. + subset_keys = draw( + st.lists( + st.sampled_from(list(d.keys())), + unique=True, + min_size=min_size, + max_size=max_size, + ) + ) + return {k: d[k] for k in subset_keys} diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index acfcdd13070..b3dd9a1140c 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -16,6 +16,7 @@ dimension_names, dimension_sizes, supported_dtypes, + unique_subset_of, variables, ) from xarray.tests import requires_numpy_array_api @@ -236,3 +237,25 @@ def test_make_strategies_namespace(self, data): dtype=np_array_api_st.scalar_dtypes(), ) ) + + +@given(st.data()) +def test_mean(data): + """Test that the mean of any xarray Variable is always equal to the mean of the underlying numpy array.""" + + # create arbitrary data + array_dims = data.draw(dimension_sizes(min_dims=1)) + # print(array_dims) + var = data.draw(variables(dims=st.just(array_dims))) + + # specify arbitrary reduction + reduction_dims = list(data.draw(unique_subset_of(array_dims, min_size=1)).keys()) + # print(reduction_dims) + + # create expected result (using nanmean because arrays with Nans will be generated) + reduction_axes = tuple(var.get_axis_num(dim) for dim in reduction_dims) + expected = np.nanmean(var.data, axis=reduction_axes) + + # assert property is always satisfied + result = var.mean(dim=reduction_dims).data + npt.assert_equal(expected, result) From 14998c18e07ed4499b35ec41fc94ddb5d0052626 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Fri, 3 Nov 2023 21:10:56 -0400 Subject: [PATCH 115/155] expose unique_subset_of --- doc/api.rst | 1 + doc/user-guide/testing.rst | 47 +++++++++++++++++++++++++++++++-- xarray/testing/strategies.py | 14 ++++++++-- xarray/tests/test_strategies.py | 22 +++++++++++++++ 4 files changed, 80 insertions(+), 4 deletions(-) diff --git a/doc/api.rst b/doc/api.rst index ca783fb0409..9b3392a8582 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -1087,6 +1087,7 @@ Hypothesis Testing Strategies testing.strategies.dimension_sizes testing.strategies.attrs testing.strategies.variables + testing.strategies.unique_subset_of Exceptions ========== diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index 81255f0fd75..157de491a7b 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -50,6 +50,7 @@ These strategies are accessible in the :py:mod:`xarray.testing.strategies` modul testing.strategies.dimension_sizes testing.strategies.attrs testing.strategies.variables + testing.strategies.unique_subset_of These build upon the numpy and array API strategies offered in :py:mod:`hypothesis.extra.numpy` and :py:mod:`hypothesis.extra.array_api`: @@ -157,8 +158,8 @@ length either 3 or 4, and will sometimes also generate a ``z`` dimension of leng By feeding this strategy for dictionaries into the ``dims`` argument of xarray's :py:func:`~st.variables` strategy, we can generate arbitrary :py:class:`~xarray.Variable` objects whose dimensions will always match these specifications. -Creating Duck-type Arrays -~~~~~~~~~~~~~~~~~~~~~~~~~ +Generating Duck-type Arrays +~~~~~~~~~~~~~~~~~~~~~~~~~~~ Xarray objects don't have to wrap numpy arrays, in fact they can wrap any array type which presents the same API as a numpy array (so-called "duck array wrapping", see :ref:`wrapping numpy-like arrays `). @@ -256,3 +257,45 @@ you can use this neat trick: xp_variables.example() Another array API-compliant duck array library would replace the import, e.g. ``import cupy as cp`` instead. + +Testing over Subsets of Dimensions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +A common task when testing xarray user code is checking that your function works for all valid input dimensions. +We can chain strategies to achieve this, for which the helper strategy :py:func:`~testing.strategies.unique_subset_of` +is useful. + +.. ipython:: python + + dim_sizes = {"x": 2, "y": 3, "z": 4} + unique_subset_of(dim_sizes).example() + unique_subset_of(dim_sizes).example() + +This is useful because operations like reductions can be performed over any subset of the xarray object's dimensions. +For example we can write a pytest test that tests that a reduction gives the expected result when applying that reduction +along any possible valid subset of the Variable's dimensions. + +.. code-block:: python + + from hypothesis import given + import numpy.testing as npt + + + @given(st.data()) + def test_mean(data): + """Test that the mean of an xarray Variable is always equal to the mean of the underlying array.""" + + # create arbitrary data + array_dims = data.draw(dimension_names()) + var = data.draw(variables(dims=array_dims)) + + # specify arbitrary reduction + reduction_dims = data.draw(xrst.unique_subset_of(array_dims)) + + # create expected result + reduction_axes = [var.get_axis_num(dim) for dim in reduction_dims] + expected = var.data.mean(axis=reduction_axes) + + # assert property is always satisfied + result = var.mean(dims=reduction_dims).data + npt.assert_equal(expected, result) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 8fb28ca89de..b517851436d 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -15,6 +15,7 @@ "dimension_sizes", "attrs", "variables", + "unique_subset_of", ] @@ -364,7 +365,7 @@ def variables( @st.composite def unique_subset_of( draw: st.DrawFn, - d: Mapping[Hashable, Any], + d: dict[Hashable, Any], *, min_size: int = 0, max_size: Union[int, None] = None, @@ -392,8 +393,17 @@ def unique_subset_of( Examples -------- - + >>> unique_subset_of({"x": 2, "y": 3}).example() # doctest: +SKIP + {'y': 3} """ + if not isinstance(d, dict): + raise TypeError( + f"Object to sample from must be a dict, but received type {type(d)}" + ) + + if len(d) == 0: + raise ValueError("Can't sample from a length-zero sequence.") + # TODO generalize this to work for any iterable? Could then be used on dimension_names as well as dimension_sizes. subset_keys = draw( st.lists( diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index b3dd9a1140c..60aa6d7fee0 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -239,6 +239,28 @@ def test_make_strategies_namespace(self, data): ) +class TestUniqueSubsetOf: + @given(st.data()) + def test_invalid(self, data): + with pytest.raises(TypeError, match="must be a dict"): + data.draw(unique_subset_of(0)) + + with pytest.raises(ValueError, match="length-zero sequence"): + data.draw(unique_subset_of({})) + + @given(st.data()) + def test_mapping(self, data): + dim_sizes = data.draw(dimension_sizes(min_dims=1)) + subset_of_dim_sizes = data.draw(unique_subset_of(dim_sizes)) + + for dim, length in subset_of_dim_sizes.items(): + assert dim in dim_sizes + assert dim_sizes[dim] == length + + def test_iterable(self): + ... + + @given(st.data()) def test_mean(data): """Test that the mean of any xarray Variable is always equal to the mean of the underlying numpy array.""" From 71f01f95f7a6474b71e47cc80eade16853f52291 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Fri, 3 Nov 2023 21:31:07 -0400 Subject: [PATCH 116/155] generalize unique_subset_of to handle iterables --- xarray/testing/strategies.py | 36 +++++++++++++++++++++------------ xarray/tests/test_strategies.py | 11 +++++++--- 2 files changed, 31 insertions(+), 16 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index b517851436d..c985ec706b0 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -1,4 +1,4 @@ -from collections.abc import Hashable, Mapping, Sequence +from collections.abc import Hashable, Iterable, Mapping, Sequence from typing import Any, Protocol, Union import hypothesis.extra.numpy as npst @@ -362,25 +362,26 @@ def variables( return xr.Variable(dims=dim_names, data=_data, attrs=draw(attrs)) +# TODO use overloads for typing @st.composite def unique_subset_of( draw: st.DrawFn, - d: dict[Hashable, Any], + objs: Union[Iterable[Hashable], Mapping[Hashable, Any]], *, min_size: int = 0, max_size: Union[int, None] = None, -) -> Mapping[Hashable, Any]: +) -> Union[Iterable[Hashable], Mapping[Hashable, Any]]: """ - Return a strategy which generates a unique subset of the given mapping. + Return a strategy which generates a unique subset of the given objs. - Each entry in the output subset will have a unique key. + Each entry in the output subset will be unique (if input was an iterable) or have a unique key (if it was a mapping). Requires the hypothesis package to be installed. Parameters ---------- - d: Mapping[Hashable, Any] - Mapping from which to sample to produce the subset. + objs: Union[Iterable[Hashable], Mapping[Hashable, Any]] + Objects from which to sample to produce the subset. min_size: int, optional Minimum size of the returned subset. Default is 0. max_size: int, optional @@ -395,22 +396,31 @@ def unique_subset_of( -------- >>> unique_subset_of({"x": 2, "y": 3}).example() # doctest: +SKIP {'y': 3} + >>> unique_subset_of(["x", "y"]).example() # doctest: +SKIP + ['x'] """ - if not isinstance(d, dict): + if not isinstance(objs, Iterable): raise TypeError( - f"Object to sample from must be a dict, but received type {type(d)}" + f"Object to sample from must be an Iterable or a Mapping, but received type {type(objs)}" ) - if len(d) == 0: + if len(objs) == 0: raise ValueError("Can't sample from a length-zero sequence.") - # TODO generalize this to work for any iterable? Could then be used on dimension_names as well as dimension_sizes. + keys = list(objs.keys()) if isinstance(objs, Mapping) else objs + subset_keys = draw( st.lists( - st.sampled_from(list(d.keys())), + st.sampled_from(keys), unique=True, min_size=min_size, max_size=max_size, ) ) - return {k: d[k] for k in subset_keys} + + if isinstance(objs, dict): + subset_objs = {k: objs[k] for k in subset_keys} + else: + subset_objs = tuple(subset_keys) + + return subset_objs diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index 60aa6d7fee0..017fad16b7e 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -242,7 +242,7 @@ def test_make_strategies_namespace(self, data): class TestUniqueSubsetOf: @given(st.data()) def test_invalid(self, data): - with pytest.raises(TypeError, match="must be a dict"): + with pytest.raises(TypeError, match="must be an Iterable or a Mapping"): data.draw(unique_subset_of(0)) with pytest.raises(ValueError, match="length-zero sequence"): @@ -257,8 +257,13 @@ def test_mapping(self, data): assert dim in dim_sizes assert dim_sizes[dim] == length - def test_iterable(self): - ... + @given(st.data()) + def test_iterable(self, data): + dim_names = data.draw(dimension_names(min_dims=1)) + subset_of_dim_names = data.draw(unique_subset_of(dim_names)) + + for dim in subset_of_dim_names: + assert dim in dim_names @given(st.data()) From 9c1089594ea7386fc82969334c13f304ad2db2e0 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Fri, 3 Nov 2023 21:39:38 -0400 Subject: [PATCH 117/155] type hint unique_subset_of using overloads --- xarray/testing/strategies.py | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index c985ec706b0..6aa3438e100 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -1,5 +1,5 @@ from collections.abc import Hashable, Iterable, Mapping, Sequence -from typing import Any, Protocol, Union +from typing import Any, Protocol, Union, overload import hypothesis.extra.numpy as npst import hypothesis.strategies as st @@ -362,7 +362,26 @@ def variables( return xr.Variable(dims=dim_names, data=_data, attrs=draw(attrs)) -# TODO use overloads for typing +@overload +def unique_subset_of( + objs: Iterable[Hashable], + *, + min_size: int = 0, + max_size: Union[int, None] = None, +) -> st.SearchStrategy[Iterable[Hashable]]: + ... + + +@overload +def unique_subset_of( + objs: Mapping[Hashable, Any], + *, + min_size: int = 0, + max_size: Union[int, None] = None, +) -> st.SearchStrategy[Mapping[Hashable, Any]]: + ... + + @st.composite def unique_subset_of( draw: st.DrawFn, From 2833f01b7bab0aec9363306e99a013af985ec19d Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Fri, 3 Nov 2023 21:41:38 -0400 Subject: [PATCH 118/155] use iterables in test_mean example --- xarray/tests/test_strategies.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index 017fad16b7e..03687898c89 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -271,13 +271,11 @@ def test_mean(data): """Test that the mean of any xarray Variable is always equal to the mean of the underlying numpy array.""" # create arbitrary data - array_dims = data.draw(dimension_sizes(min_dims=1)) - # print(array_dims) + array_dims = data.draw(dimension_names(min_dims=1)) var = data.draw(variables(dims=st.just(array_dims))) # specify arbitrary reduction - reduction_dims = list(data.draw(unique_subset_of(array_dims, min_size=1)).keys()) - # print(reduction_dims) + reduction_dims = data.draw(unique_subset_of(array_dims, min_size=1)) # create expected result (using nanmean because arrays with Nans will be generated) reduction_axes = tuple(var.get_axis_num(dim) for dim in reduction_dims) From 1ddc515a1f6232f5267b32a0145f51241f1488b7 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Fri, 3 Nov 2023 21:44:40 -0400 Subject: [PATCH 119/155] test_mean example in docs now uses iterable of dimension_names --- doc/user-guide/testing.rst | 14 +++++++------- xarray/tests/test_strategies.py | 20 -------------------- 2 files changed, 7 insertions(+), 27 deletions(-) diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index 157de491a7b..81208906849 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -286,16 +286,16 @@ along any possible valid subset of the Variable's dimensions. """Test that the mean of an xarray Variable is always equal to the mean of the underlying array.""" # create arbitrary data - array_dims = data.draw(dimension_names()) - var = data.draw(variables(dims=array_dims)) + array_dims = data.draw(dimension_names(min_dims=1)) + var = data.draw(variables(dims=st.just(array_dims))) # specify arbitrary reduction - reduction_dims = data.draw(xrst.unique_subset_of(array_dims)) + reduction_dims = data.draw(xrst.unique_subset_of(array_dims, min_size=1)) - # create expected result - reduction_axes = [var.get_axis_num(dim) for dim in reduction_dims] - expected = var.data.mean(axis=reduction_axes) + # create expected result (using nanmean because arrays with Nans will be generated) + reduction_axes = tuple(var.get_axis_num(dim) for dim in reduction_dims) + expected = np.nanmean(var.data, axis=reduction_axes) # assert property is always satisfied - result = var.mean(dims=reduction_dims).data + result = var.mean(dim=reduction_dims).data npt.assert_equal(expected, result) diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index 03687898c89..6b0a3d179f0 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -264,23 +264,3 @@ def test_iterable(self, data): for dim in subset_of_dim_names: assert dim in dim_names - - -@given(st.data()) -def test_mean(data): - """Test that the mean of any xarray Variable is always equal to the mean of the underlying numpy array.""" - - # create arbitrary data - array_dims = data.draw(dimension_names(min_dims=1)) - var = data.draw(variables(dims=st.just(array_dims))) - - # specify arbitrary reduction - reduction_dims = data.draw(unique_subset_of(array_dims, min_size=1)) - - # create expected result (using nanmean because arrays with Nans will be generated) - reduction_axes = tuple(var.get_axis_num(dim) for dim in reduction_dims) - expected = np.nanmean(var.data, axis=reduction_axes) - - # assert property is always satisfied - result = var.mean(dim=reduction_dims).data - npt.assert_equal(expected, result) From 618bfea40ad12e89b017808eb2bb355ab68e5a76 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Fri, 3 Nov 2023 21:47:57 -0400 Subject: [PATCH 120/155] fix some warnings in docs build --- doc/api.rst | 2 +- xarray/testing/strategies.py | 10 ++++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/doc/api.rst b/doc/api.rst index 9b3392a8582..c2f63af094f 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -1081,7 +1081,7 @@ Hypothesis Testing Strategies .. autosummary:: :toctree: generated/ - testing.strategies.numeric_dtypes + testing.strategies.supported_dtypes testing.strategies.names testing.strategies.dimension_names testing.strategies.dimension_sizes diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 6aa3438e100..9541c669102 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -35,14 +35,16 @@ def supported_dtypes() -> st.SearchStrategy[np.dtype]: """ Generates only those numpy dtypes which xarray can handle. - Requires the hypothesis package to be installed. - - Not just using hypothesis.extra.numpy.scalar_dtypes is required to exclude weirder dtypes + Avoiding using hypothesis.extra.numpy.scalar_dtypes is required to exclude weirder dtypes e.g. unicode, byte_string, array, or nested dtypes. Also required to dodge bugs with pandas non-nanosecond datetime overflows. + + Does not generate all dtypes that xarray can handle - just only generates dtypes which it definitely can. + + Requires the hypothesis package to be installed. """ # TODO should this be exposed publicly? - # We should decide what the set of numpy dtypes that xarray officially supports is. + # We should at least decide what the set of numpy dtypes that xarray officially supports is. return ( npst.integer_dtypes() | npst.unsigned_integer_dtypes() From fe1ff1a9f0d4cc07bbb1411860b3d7e47f1a9bd9 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Fri, 3 Nov 2023 21:51:54 -0400 Subject: [PATCH 121/155] example of passing list to unique_subset_of --- doc/user-guide/testing.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index 81208906849..207223e7c53 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -265,6 +265,16 @@ A common task when testing xarray user code is checking that your function works We can chain strategies to achieve this, for which the helper strategy :py:func:`~testing.strategies.unique_subset_of` is useful. +It works for lists of dimension names + +.. ipython:: python + + dim_sizes = ["x", "y", "z"] + unique_subset_of(dim_sizes).example() + unique_subset_of(dim_sizes).example() + +as well as for mappings of dimension names to sizes + .. ipython:: python dim_sizes = {"x": 2, "y": 3, "z": 4} From 2e038eaeaa920e96d86acd1940cb1afe22f482fd Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Fri, 3 Nov 2023 22:27:04 -0400 Subject: [PATCH 122/155] fix import in docs page --- doc/user-guide/testing.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index 207223e7c53..e261b1d3004 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -270,16 +270,16 @@ It works for lists of dimension names .. ipython:: python dim_sizes = ["x", "y", "z"] - unique_subset_of(dim_sizes).example() - unique_subset_of(dim_sizes).example() + xrst.unique_subset_of(dim_sizes).example() + xrst.unique_subset_of(dim_sizes).example() as well as for mappings of dimension names to sizes .. ipython:: python dim_sizes = {"x": 2, "y": 3, "z": 4} - unique_subset_of(dim_sizes).example() - unique_subset_of(dim_sizes).example() + xrst.unique_subset_of(dim_sizes).example() + xrst.unique_subset_of(dim_sizes).example() This is useful because operations like reductions can be performed over any subset of the xarray object's dimensions. For example we can write a pytest test that tests that a reduction gives the expected result when applying that reduction From 04c3dc17a7a54130b9b11b2a52dc6b50bca4d5be Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Sat, 4 Nov 2023 00:01:15 -0400 Subject: [PATCH 123/155] try to satisfy sphinx --- xarray/testing/strategies.py | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 9541c669102..f67121d5d7a 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -30,16 +30,13 @@ def __call__( ... -# def supported_dtypes() -> st.SearchStrategy[np.dtype]: """ Generates only those numpy dtypes which xarray can handle. - Avoiding using hypothesis.extra.numpy.scalar_dtypes is required to exclude weirder dtypes - e.g. unicode, byte_string, array, or nested dtypes. + Avoiding using hypothesis.extra.numpy.scalar_dtypes is required to exclude weirder dtypes e.g. unicode, byte_string, array, or nested dtypes. Also required to dodge bugs with pandas non-nanosecond datetime overflows. - - Does not generate all dtypes that xarray can handle - just only generates dtypes which it definitely can. + Note does not generate all dtypes that xarray can handle - just only generates dtypes which it definitely can. Requires the hypothesis package to be installed. """ @@ -412,6 +409,7 @@ def unique_subset_of( Returns ------- unique_subset_strategy + Strategy generating subset of the input. Examples -------- @@ -439,9 +437,6 @@ def unique_subset_of( ) ) - if isinstance(objs, dict): - subset_objs = {k: objs[k] for k in subset_keys} - else: - subset_objs = tuple(subset_keys) - - return subset_objs + return ( + {k: objs[k] for k in subset_keys} if isinstance(objs, Mapping) else subset_keys + ) From cf35fb9d258da4b51cacd43628d1b94c08e7861b Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Sat, 4 Nov 2023 04:39:47 -0400 Subject: [PATCH 124/155] Minor corrections to docs --- doc/user-guide/testing.rst | 9 ++++----- doc/whats-new.rst | 10 +--------- 2 files changed, 5 insertions(+), 14 deletions(-) diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index e261b1d3004..19a90eb32c2 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -269,9 +269,9 @@ It works for lists of dimension names .. ipython:: python - dim_sizes = ["x", "y", "z"] - xrst.unique_subset_of(dim_sizes).example() - xrst.unique_subset_of(dim_sizes).example() + dims = ["x", "y", "z"] + xrst.unique_subset_of(dims).example() + xrst.unique_subset_of(dims).example() as well as for mappings of dimension names to sizes @@ -287,7 +287,6 @@ along any possible valid subset of the Variable's dimensions. .. code-block:: python - from hypothesis import given import numpy.testing as npt @@ -299,7 +298,7 @@ along any possible valid subset of the Variable's dimensions. array_dims = data.draw(dimension_names(min_dims=1)) var = data.draw(variables(dims=st.just(array_dims))) - # specify arbitrary reduction + # specify arbitrary reduction along at least one dimension reduction_dims = data.draw(xrst.unique_subset_of(array_dims, min_size=1)) # create expected result (using nanmean because arrays with Nans will be generated) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index e16d1f54fae..e425800064e 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -24,16 +24,8 @@ New Features - Added hypothesis strategies for generating :py:class:`xarray.Variable` objects containing arbitrary data, useful for parametrizing downstream tests. Accessible under :py:func:`testing.strategies`, and documented in a new page on testing in the User Guide. - (:issue:`6911`, :pull:`6908`) + (:issue:`6911`, :pull:`8404`) By `Tom Nicholas `_. -- Enable taking the mean of dask-backed :py:class:`cftime.datetime` arrays - (:pull:`6556`, :pull:`6940`). By `Deepak Cherian - `_ and `Spencer Clark - `_. -- Allow creating Xarray objects where a multidimensional variable shares its name - with a dimension. Examples include output from finite volume models like FVCOM. - (:issue:`2233`, :pull:`7989`) - By `Deepak Cherian `_ and `Benoit Bovy `_. - Use `opt_einsum `_ for :py:func:`xarray.dot` by default if installed. By `Deepak Cherian `_. (:issue:`7764`, :pull:`8373`). From 4811e8ac12cd5286fd5b5943fa598327f3ce45d8 Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Sun, 5 Nov 2023 06:20:43 -0500 Subject: [PATCH 125/155] Add supported_dtypes to list of public strategies in docs --- doc/user-guide/testing.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index 19a90eb32c2..14fc64317bd 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -44,7 +44,7 @@ These strategies are accessible in the :py:mod:`xarray.testing.strategies` modul .. currentmodule:: xarray .. autosummary:: - + testing.supported_dtypes testing.strategies.names testing.strategies.dimension_names testing.strategies.dimension_sizes From a03625353a91992d69d0aeda0659ef494dcfd37e Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Sun, 5 Nov 2023 06:22:32 -0500 Subject: [PATCH 126/155] Generate number of dimensions in test_given_arbitrary_dims_list Co-authored-by: Zac Hatfield-Dodds --- xarray/tests/test_strategies.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index 6b0a3d179f0..67e13638284 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -110,12 +110,12 @@ def test_given_fixed_dims_list(self, data): assert list(var.dims) == dims - @given(st.data()) - def test_given_arbitrary_dims_list(self, data): - dims = dimension_names(min_dims=1, max_dims=1) + @given(st.data(), st.integers(0, 10)) + def test_given_arbitrary_dims_list(self, data, n): + dims = dimension_names(min_dims=n, max_dims=n) var = data.draw(variables(dims=dims)) - assert len(list(var.dims)) == 1 + assert len(list(var.dims)) == n @given(st.data()) def test_given_fixed_sizes(self, data): From 054a0dcf5482fa4199ce0c9a8646ea54d8580d79 Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Sun, 5 Nov 2023 06:23:32 -0500 Subject: [PATCH 127/155] Update minimum version of hypothesis Co-authored-by: Zac Hatfield-Dodds --- ci/requirements/doc.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/requirements/doc.yml b/ci/requirements/doc.yml index c4f590fdb18..d7737a8403e 100644 --- a/ci/requirements/doc.yml +++ b/ci/requirements/doc.yml @@ -9,7 +9,7 @@ dependencies: - cartopy - cfgrib - dask-core>=2022.1 - - hypothesis + - hypothesis>=6.75.8 - h5netcdf>=0.13 - ipykernel - ipywidgets # silence nbsphinx warning From ececa0738726eea6f0ccd5b15c04fedfa2afe7e7 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Fri, 10 Nov 2023 15:38:44 -0700 Subject: [PATCH 128/155] fix incorrect indentation in autosummary --- doc/user-guide/testing.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index 14fc64317bd..7f028cbb46f 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -44,7 +44,8 @@ These strategies are accessible in the :py:mod:`xarray.testing.strategies` modul .. currentmodule:: xarray .. autosummary:: - testing.supported_dtypes + + testing.strategies.supported_dtypes testing.strategies.names testing.strategies.dimension_names testing.strategies.dimension_sizes From 0fa090d5d8bbb4bab93ed97bf60a56d3c52c8057 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Sat, 11 Nov 2023 10:59:35 -0700 Subject: [PATCH 129/155] link to docs page on testing --- doc/api.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/api.rst b/doc/api.rst index c2f63af094f..856ebf10720 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -1075,6 +1075,8 @@ Hypothesis Testing Strategies .. currentmodule:: xarray +See the :ref:`documentation page on testing ` for a guide on how to use these strategies. + .. warning:: These strategies should be considered highly experimental, and liable to change at any time. From a9ac6f1a0b9f6b3f1b67f83f11505bffab059c8d Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Sat, 11 Nov 2023 11:00:11 -0700 Subject: [PATCH 130/155] use warning imperative for array API non-compliant dtypes --- doc/user-guide/testing.rst | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index 7f028cbb46f..419364cdaaf 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -12,7 +12,7 @@ Testing your code np.random.seed(123456) -.. _hypothesis: +.. _testing.hypothesis: Hypothesis testing ------------------ @@ -232,11 +232,16 @@ want to wrap. Compatibility with the Python Array API Standard ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The strategies defined in :py:mod:`testing.strategies` are **not** guaranteed to use array API standard-compliant -dtypes by default. -For example arrays with the dtype ``np.dtype('float16')`` may be generated by :py:func:`testing.strategies.variables` -(assuming the ``dtype`` kwarg was not explicitly passed), despite ``np.dtype('float16')`` not being in the -array API standard. +Xarray aims to be compatible with any duck-array type that conforms to the `Python Array API Standard `_ +(see our :ref:`docs on Array API Standard support `). + +.. warning:: + + The strategies defined in :py:mod:`testing.strategies` are **not** guaranteed to use array API standard-compliant + dtypes by default. + For example arrays with the dtype ``np.dtype('float16')`` may be generated by :py:func:`testing.strategies.variables` + (assuming the ``dtype`` kwarg was not explicitly passed), despite ``np.dtype('float16')`` not being in the + array API standard. If the array type you want to generate has an array API-compliant top-level namespace (e.g. that which is conventionally imported as ``xp`` or similar), From 43831ce91020a22de75601c8e4f8a49689960162 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Sat, 11 Nov 2023 11:03:42 -0700 Subject: [PATCH 131/155] fix bugs in sparse examples --- doc/user-guide/testing.rst | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index 419364cdaaf..40bf34c8254 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -172,22 +172,19 @@ Imagine we want to write a strategy which generates arbitrary ``Variable`` objec different type: .. ipython:: python - :okexcept: import sparse .. ipython:: python - :okexcept: def convert_to_sparse(var): if var.ndim == 0: return var else: - var.data = sparse.COO.from_numpy(da.values) + var.data = sparse.COO.from_numpy(var.to_numpy()) return var .. ipython:: python - :okexcept: sparse_variables = xrst.variables().map(convert_to_sparse) @@ -197,7 +194,6 @@ different type: 2. Pass a function which returns a strategy which generates the duck-typed arrays directly to the ``array_strategy_fn`` argument of the xarray strategies: .. ipython:: python - :okexcept: @st.composite def sparse_random_arrays( @@ -215,11 +211,10 @@ different type: def sparse_random_arrays_fn( *, shape: tuple[int] = None, dtype: np.dtype = None ) -> st.SearchStrategy[sparse._coo.core.COO]: - return sparse_arrays(shape=shape) + return sparse_random_arrays(shape=shape) .. ipython:: python - :okexcept: sparse_random_variables = xrst.variables( array_strategy_fn=sparse_random_arrays_fn, dtype=st.just(np.dtype("float64")) From 62dbe887aeb68348e570eed89ad07ccbd9eac2d3 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Sat, 11 Nov 2023 11:05:05 -0700 Subject: [PATCH 132/155] add tag for array API standard info --- doc/internals/duck-arrays-integration.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/internals/duck-arrays-integration.rst b/doc/internals/duck-arrays-integration.rst index a674acb04fe..43b17be8bb8 100644 --- a/doc/internals/duck-arrays-integration.rst +++ b/doc/internals/duck-arrays-integration.rst @@ -31,6 +31,8 @@ property needs to obey `numpy's broadcasting rules `_ of these same rules). +.. _internals.duckarrays.array_api_standard: + Python Array API standard support ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ From af5eb25950281af85e64809d4684920fd9bd88b2 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Sat, 11 Nov 2023 11:52:10 -0700 Subject: [PATCH 133/155] move no-dependencies-on-other-values-inputs to given decorator --- doc/user-guide/testing.rst | 2 +- xarray/testing/strategies.py | 2 +- xarray/tests/test_strategies.py | 28 +++++++++++++--------------- 3 files changed, 15 insertions(+), 17 deletions(-) diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index 40bf34c8254..bed161cf742 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -227,7 +227,7 @@ want to wrap. Compatibility with the Python Array API Standard ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Xarray aims to be compatible with any duck-array type that conforms to the `Python Array API Standard `_ +Xarray aims to be compatible with any duck-array type that conforms to the `Python Array API Standard `_ (see our :ref:`docs on Array API Standard support `). .. warning:: diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index f67121d5d7a..6b4715db10d 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -53,7 +53,7 @@ def supported_dtypes() -> st.SearchStrategy[np.dtype]: def smallish_arrays( shape: Union[ tuple[int, ...], st.SearchStrategy[tuple[int, ...]] - ] = npst.array_shapes(max_side=4), + ] = npst.array_shapes(min_dims=0, max_dims=3, min_side=0, max_side=4), dtype: Union[np.dtype, st.SearchStrategy[np.dtype]] = supported_dtypes(), *, elements=None, diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index 67e13638284..0e04d9ea10c 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -110,12 +110,12 @@ def test_given_fixed_dims_list(self, data): assert list(var.dims) == dims - @given(st.data(), st.integers(0, 10)) - def test_given_arbitrary_dims_list(self, data, n): - dims = dimension_names(min_dims=n, max_dims=n) + @given(st.data(), st.integers(0, 3)) + def test_given_arbitrary_dims_list(self, data, ndims): + dims = dimension_names(min_dims=ndims, max_dims=ndims) var = data.draw(variables(dims=dims)) - assert len(list(var.dims)) == n + assert len(list(var.dims)) == ndims @given(st.data()) def test_given_fixed_sizes(self, data): @@ -166,9 +166,9 @@ def fixed_array_strategy_fn(*, shape=None, dtype=None): assert var.sizes == dims npt.assert_equal(var.data, arr) - @given(st.data()) - def test_given_fixed_shape_arbitrary_dims_and_arbitrary_data(self, data): - dims = dimension_names(min_dims=2, max_dims=2) + @given(st.data(), st.integers(min_value=0, max_value=3)) + def test_given_fixed_shape_arbitrary_dims_and_arbitrary_data(self, data, ndims): + dim_names = data.draw(dimension_names(min_dims=ndims, max_dims=ndims)) def array_strategy_fn(*, shape=None, dtype=None): return npst.arrays(shape=shape, dtype=dtype) @@ -176,12 +176,12 @@ def array_strategy_fn(*, shape=None, dtype=None): var = data.draw( variables( array_strategy_fn=array_strategy_fn, - dims=dims, + dims=st.just(dim_names), dtype=supported_dtypes(), ) ) - assert var.ndim == 2 + assert var.ndim == ndims @given(st.data()) def test_catch_unruly_dtype_from_custom_array_strategy_fn(self, data): @@ -248,18 +248,16 @@ def test_invalid(self, data): with pytest.raises(ValueError, match="length-zero sequence"): data.draw(unique_subset_of({})) - @given(st.data()) - def test_mapping(self, data): - dim_sizes = data.draw(dimension_sizes(min_dims=1)) + @given(st.data(), dimension_sizes(min_dims=1)) + def test_mapping(self, data, dim_sizes): subset_of_dim_sizes = data.draw(unique_subset_of(dim_sizes)) for dim, length in subset_of_dim_sizes.items(): assert dim in dim_sizes assert dim_sizes[dim] == length - @given(st.data()) - def test_iterable(self, data): - dim_names = data.draw(dimension_names(min_dims=1)) + @given(st.data(), dimension_names(min_dims=1)) + def test_iterable(self, data, dim_names): subset_of_dim_names = data.draw(unique_subset_of(dim_names)) for dim in subset_of_dim_names: From dc7825416bdc3304fe92a7f4a9b47724800e60c9 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Sat, 11 Nov 2023 12:27:33 -0700 Subject: [PATCH 134/155] generate everything that can be generated --- xarray/tests/test_strategies.py | 87 +++++++++++++-------------------- 1 file changed, 34 insertions(+), 53 deletions(-) diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index 0e04d9ea10c..9c110a45742 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -35,10 +35,11 @@ def test_types(self, dims): def test_unique(self, dims): assert len(set(dims)) == len(dims) - @given(dimension_names(min_dims=3, max_dims=3)) - def test_fixed_number_of_dims(self, dims): - assert isinstance(dims, list) - assert len(dims) == 3 + @given(st.data(), st.integers(min_value=0, max_value=3)) + def test_fixed_number_of_dims(self, data, ndims): + dim_names = data.draw(dimension_names(min_dims=ndims, max_dims=ndims)) + assert isinstance(dim_names, list) + assert len(dim_names) == ndims class TestDimensionSizesStrategy: @@ -47,12 +48,16 @@ def test_types(self, dims): assert isinstance(dims, dict) for d, n in dims.items(): assert isinstance(d, str) + assert len(d) >= 1 + assert isinstance(n, int) + assert n >= 0 - @given(dimension_sizes(min_dims=3, max_dims=3)) - def test_fixed_number_of_dims(self, dims): - assert isinstance(dims, dict) - assert len(dims) == 3 + @given(st.data(), st.integers(min_value=0, max_value=3)) + def test_fixed_number_of_dims(self, data, ndims): + dim_sizes = data.draw(dimension_sizes(min_dims=ndims, max_dims=ndims)) + assert isinstance(dim_sizes, dict) + assert len(dim_sizes) == ndims @given(st.data()) def test_restrict_names(self, data): @@ -103,71 +108,47 @@ def test_given_incorrect_types(self, data): with pytest.raises(TypeError, match="Callable"): data.draw(variables(array_strategy_fn=np.array([0]))) # type: ignore[arg-type] - @given(st.data()) - def test_given_fixed_dims_list(self, data): - dims = ["x", "y"] - var = data.draw(variables(dims=st.just(dims))) + @given(st.data(), dimension_names()) + def test_given_fixed_dim_names(self, data, fixed_dim_names): + var = data.draw(variables(dims=st.just(fixed_dim_names))) - assert list(var.dims) == dims + assert list(var.dims) == fixed_dim_names - @given(st.data(), st.integers(0, 3)) - def test_given_arbitrary_dims_list(self, data, ndims): - dims = dimension_names(min_dims=ndims, max_dims=ndims) - var = data.draw(variables(dims=dims)) + @given(st.data(), dimension_sizes()) + def test_given_fixed_dim_sizes(self, data, dim_sizes): + var = data.draw(variables(dims=st.just(dim_sizes))) # type: ignore[arg-type] - assert len(list(var.dims)) == ndims + assert var.dims == tuple(dim_sizes.keys()) + assert var.shape == tuple(dim_sizes.values()) - @given(st.data()) - def test_given_fixed_sizes(self, data): - dims = {"x": 3, "y": 4} - var = data.draw(variables(dims=st.just(dims))) # type: ignore[arg-type] - - assert var.dims == ("x", "y") - assert var.shape == (3, 4) - - @given(st.data()) - def test_given_fixed_dtype(self, data): - var = data.draw(variables(dtype=st.just(np.dtype("int32")))) - - assert var.dtype == np.dtype("int32") + @given(st.data(), supported_dtypes()) + def test_given_fixed_dtype(self, data, dtype): + var = data.draw(variables(dtype=st.just(dtype))) - @given(st.data()) - def test_given_fixed_data(self, data): - arr = np.asarray([[1, 2], [3, 4]]) + assert var.dtype == dtype + @given(st.data(), npst.arrays(shape=npst.array_shapes(), dtype=supported_dtypes())) + def test_given_fixed_data_dims_and_dtype(self, data, arr): def fixed_array_strategy_fn(*, shape=None, dtype=None): + """The fact this ignores shape and dtype is only okay because compatible shape & dtype will be passed separately.""" return st.just(arr) - var = data.draw( - variables( - array_strategy_fn=fixed_array_strategy_fn, dims=st.just({"x": 2, "y": 2}), dtype=st.just(arr.dtype) # type: ignore[arg-type] - ) - ) - - npt.assert_equal(var.data, arr) - assert var.dtype == arr.dtype - - @given(st.data()) - def test_given_fixed_dims_and_fixed_data(self, data): - dims = {"x": 2, "y": 2} - arr = np.asarray([[1, 2], [3, 4]]) - - def fixed_array_strategy_fn(*, shape=None, dtype=None): - return st.just(arr) + dim_names = data.draw(dimension_names(min_dims=arr.ndim, max_dims=arr.ndim)) + dim_sizes = {name: size for name, size in zip(dim_names, arr.shape)} var = data.draw( variables( array_strategy_fn=fixed_array_strategy_fn, - dims=st.just(dims), # type: ignore[arg-type] + dims=st.just(dim_sizes), dtype=st.just(arr.dtype), ) ) - assert var.sizes == dims npt.assert_equal(var.data, arr) + assert var.dtype == arr.dtype @given(st.data(), st.integers(min_value=0, max_value=3)) - def test_given_fixed_shape_arbitrary_dims_and_arbitrary_data(self, data, ndims): + def test_given_array_strat_arbitrary_size_and_arbitrary_data(self, data, ndims): dim_names = data.draw(dimension_names(min_dims=ndims, max_dims=ndims)) def array_strategy_fn(*, shape=None, dtype=None): From 58223907091bfd71906af035f1861402584fd567 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Sat, 11 Nov 2023 12:28:00 -0700 Subject: [PATCH 135/155] fix internal link to page on strategies --- doc/api.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/api.rst b/doc/api.rst index 856ebf10720..f4f6b3923d5 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -1075,7 +1075,7 @@ Hypothesis Testing Strategies .. currentmodule:: xarray -See the :ref:`documentation page on testing ` for a guide on how to use these strategies. +See the :ref:`documentation page on testing ` for a guide on how to use these strategies. .. warning:: These strategies should be considered highly experimental, and liable to change at any time. From eeb6b32ea3271ee9c03c4562b35af1178d6dac57 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Sat, 11 Nov 2023 12:39:20 -0700 Subject: [PATCH 136/155] split up TypeError messages for each arg --- xarray/testing/strategies.py | 19 +++++++++++++------ xarray/tests/test_strategies.py | 8 ++++---- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 6b4715db10d..973172ba202 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -293,12 +293,19 @@ def variables( [-2.75034266e-225+2.22507386e-311j]]) """ - if any( - not isinstance(arg, st.SearchStrategy) and arg is not None - for arg in [dims, dtype, attrs] - ): + if not isinstance(dims, st.SearchStrategy) and dims is not None: raise TypeError( - "Contents dims, dtype, and attrs must each be provided as a hypothesis.strategies.SearchStrategy object (or None)." + f"dims must be provided as a hypothesis.strategies.SearchStrategy object (or None), but got type {type(dims)}. " + "To specify fixed contents, use hypothesis.strategies.just()." + ) + if not isinstance(dtype, st.SearchStrategy) and dtype is not None: + raise TypeError( + f"dtype must be provided as a hypothesis.strategies.SearchStrategy object (or None), but got type {type(dtype)}. " + "To specify fixed contents, use hypothesis.strategies.just()." + ) + if not isinstance(attrs, st.SearchStrategy) and attrs is not None: + raise TypeError( + f"attrs must be provided as a hypothesis.strategies.SearchStrategy object (or None), but got type {type(attrs)}. " "To specify fixed contents, use hypothesis.strategies.just()." ) @@ -424,7 +431,7 @@ def unique_subset_of( ) if len(objs) == 0: - raise ValueError("Can't sample from a length-zero sequence.") + raise ValueError("Can't sample from a length-zero object.") keys = list(objs.keys()) if isinstance(objs, Mapping) else objs diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index 9c110a45742..572a0afa2b8 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -96,13 +96,13 @@ def test_given_nothing(self, var): @given(st.data()) def test_given_incorrect_types(self, data): - with pytest.raises(TypeError, match="SearchStrategy object"): + with pytest.raises(TypeError, match="dims must be provided as a"): data.draw(variables(dims=["x", "y"])) # type: ignore[arg-type] - with pytest.raises(TypeError, match="SearchStrategy object"): + with pytest.raises(TypeError, match="dtype must be provided as a"): data.draw(variables(dtype=np.dtype("int32"))) # type: ignore[arg-type] - with pytest.raises(TypeError, match="SearchStrategy object"): + with pytest.raises(TypeError, match="attrs must be provided as a"): data.draw(variables(attrs=dict())) # type: ignore[arg-type] with pytest.raises(TypeError, match="Callable"): @@ -226,7 +226,7 @@ def test_invalid(self, data): with pytest.raises(TypeError, match="must be an Iterable or a Mapping"): data.draw(unique_subset_of(0)) - with pytest.raises(ValueError, match="length-zero sequence"): + with pytest.raises(ValueError, match="length-zero object"): data.draw(unique_subset_of({})) @given(st.data(), dimension_sizes(min_dims=1)) From e13c6ac225e915131f2317ea29416cf32fff3e6e Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Sat, 11 Nov 2023 12:41:59 -0700 Subject: [PATCH 137/155] use hypothesis.errors.InvalidArgument --- xarray/testing/strategies.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 973172ba202..7dd503a2f03 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -3,6 +3,7 @@ import hypothesis.extra.numpy as npst import hypothesis.strategies as st +from hypothesis.errors import InvalidArgument import numpy as np import xarray as xr @@ -294,17 +295,17 @@ def variables( """ if not isinstance(dims, st.SearchStrategy) and dims is not None: - raise TypeError( + raise InvalidArgument( f"dims must be provided as a hypothesis.strategies.SearchStrategy object (or None), but got type {type(dims)}. " "To specify fixed contents, use hypothesis.strategies.just()." ) if not isinstance(dtype, st.SearchStrategy) and dtype is not None: - raise TypeError( + raise InvalidArgument( f"dtype must be provided as a hypothesis.strategies.SearchStrategy object (or None), but got type {type(dtype)}. " "To specify fixed contents, use hypothesis.strategies.just()." ) if not isinstance(attrs, st.SearchStrategy) and attrs is not None: - raise TypeError( + raise InvalidArgument( f"attrs must be provided as a hypothesis.strategies.SearchStrategy object (or None), but got type {type(attrs)}. " "To specify fixed contents, use hypothesis.strategies.just()." ) @@ -314,7 +315,7 @@ def variables( if array_strategy_fn is None: _array_strategy_fn = smallish_arrays # type: ignore[assignment] elif not callable(array_strategy_fn): - raise TypeError( + raise InvalidArgument( "array_strategy_fn must be a Callable that accepts the kwargs dtype and shape and returns a hypothesis " "strategy which generates corresponding array-like objects." ) @@ -338,7 +339,7 @@ def variables( dim_names, _shape = list(_dims.keys()), tuple(_dims.values()) array_strategy = _array_strategy_fn(shape=_shape, dtype=_dtype) else: - raise TypeError( + raise InvalidArgument( f"Invalid type returned by dims strategy - drew an object of type {type(dims)}" ) else: From a169e1f31f00eb25fef3687e9fc0df78a14512fa Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Sat, 11 Nov 2023 12:49:10 -0700 Subject: [PATCH 138/155] generalize tests for generating specific number of dimensions --- xarray/testing/strategies.py | 2 +- xarray/tests/test_strategies.py | 20 +++++++++++--------- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 7dd503a2f03..9b8cac5329a 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -3,8 +3,8 @@ import hypothesis.extra.numpy as npst import hypothesis.strategies as st -from hypothesis.errors import InvalidArgument import numpy as np +from hypothesis.errors import InvalidArgument import xarray as xr from xarray.core.types import T_DuckArray diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index 572a0afa2b8..69467c59e08 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -35,11 +35,12 @@ def test_types(self, dims): def test_unique(self, dims): assert len(set(dims)) == len(dims) - @given(st.data(), st.integers(min_value=0, max_value=3)) - def test_fixed_number_of_dims(self, data, ndims): - dim_names = data.draw(dimension_names(min_dims=ndims, max_dims=ndims)) + @given(st.data(), st.tuples(st.integers(0, 10), st.integers(0, 10)).map(sorted)) + def test_number_of_dims(self, data, ndims): + min_dims, max_dims = ndims + dim_names = data.draw(dimension_names(min_dims=min_dims, max_dims=max_dims)) assert isinstance(dim_names, list) - assert len(dim_names) == ndims + assert min_dims <= len(dim_names) <= max_dims class TestDimensionSizesStrategy: @@ -53,11 +54,12 @@ def test_types(self, dims): assert isinstance(n, int) assert n >= 0 - @given(st.data(), st.integers(min_value=0, max_value=3)) - def test_fixed_number_of_dims(self, data, ndims): - dim_sizes = data.draw(dimension_sizes(min_dims=ndims, max_dims=ndims)) + @given(st.data(), st.tuples(st.integers(0, 10), st.integers(0, 10)).map(sorted)) + def test_number_of_dims(self, data, ndims): + min_dims, max_dims = ndims + dim_sizes = data.draw(dimension_sizes(min_dims=min_dims, max_dims=max_dims)) assert isinstance(dim_sizes, dict) - assert len(dim_sizes) == ndims + assert min_dims <= len(dim_sizes) <= max_dims @given(st.data()) def test_restrict_names(self, data): @@ -147,7 +149,7 @@ def fixed_array_strategy_fn(*, shape=None, dtype=None): npt.assert_equal(var.data, arr) assert var.dtype == arr.dtype - @given(st.data(), st.integers(min_value=0, max_value=3)) + @given(st.data(), st.integers(0, 3)) def test_given_array_strat_arbitrary_size_and_arbitrary_data(self, data, ndims): dim_names = data.draw(dimension_names(min_dims=ndims, max_dims=ndims)) From 46b36b9c02936d34dca30ea13c851f67b0e9facb Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Sat, 11 Nov 2023 13:15:45 -0700 Subject: [PATCH 139/155] fix some typing errors --- xarray/core/types.py | 1 + xarray/testing/strategies.py | 34 ++++++++++++++++++--------------- xarray/tests/test_strategies.py | 6 +++--- 3 files changed, 23 insertions(+), 18 deletions(-) diff --git a/xarray/core/types.py b/xarray/core/types.py index 1be5b00c43f..93e3e246575 100644 --- a/xarray/core/types.py +++ b/xarray/core/types.py @@ -175,6 +175,7 @@ def copy( # hopefully in the future we can narrow this down more: T_DuckArray = TypeVar("T_DuckArray", bound=Any) + ScalarOrArray = Union["ArrayLike", np.generic, np.ndarray, "DaskArray"] VarCompatible = Union["Variable", "ScalarOrArray"] DaCompatible = Union["DataArray", "VarCompatible"] diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 9b8cac5329a..8dd090cb676 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -1,5 +1,5 @@ from collections.abc import Hashable, Iterable, Mapping, Sequence -from typing import Any, Protocol, Union, overload +from typing import TYPE_CHECKING, Any, Protocol, Union, overload import hypothesis.extra.numpy as npst import hypothesis.strategies as st @@ -9,6 +9,10 @@ import xarray as xr from xarray.core.types import T_DuckArray +if TYPE_CHECKING: + from xarray.core.types import _DTypeLikeNested, _ShapeLike + + __all__ = [ "supported_dtypes", "names", @@ -20,12 +24,12 @@ ] -class ArrayStrategyFn(Protocol): +class ArrayStrategyFn(Protocol[T_DuckArray]): def __call__( self, *, - shape: Union[tuple[int, ...], None] = None, - dtype: Union[np.dtype, None] = None, + shape: "_ShapeLike", + dtype: "_DTypeLikeNested", **kwargs, ) -> st.SearchStrategy[T_DuckArray]: ... @@ -351,17 +355,17 @@ def variables( _data = draw(array_strategy) - if _data.shape != _shape: + if _data.shape != _shape: # type: ignore[attr-defined] raise ValueError( "array_strategy_fn returned an array object with a different shape than it was passed." - f"Passed {_shape}, but returned {_data.shape}." + f"Passed {_shape}, but returned {_data.shape}." # type: ignore[attr-defined] "Please either specify a consistent shape via the dims kwarg or ensure the array_strategy_fn callable " "obeys the shape argument passed to it." ) - if _data.dtype != _dtype: + if _data.dtype != _dtype: # type: ignore[attr-defined] raise ValueError( "array_strategy_fn returned an array object with a different dtype than it was passed." - f"Passed {_dtype}, but returned {_data.dtype}" + f"Passed {_dtype}, but returned {_data.dtype}" # type: ignore[attr-defined] "Please either specify a consistent dtype via the dtype kwarg or ensure the array_strategy_fn callable " "obeys the dtype argument passed to it." ) @@ -371,11 +375,11 @@ def variables( @overload def unique_subset_of( - objs: Iterable[Hashable], + objs: Sequence[Hashable], *, min_size: int = 0, max_size: Union[int, None] = None, -) -> st.SearchStrategy[Iterable[Hashable]]: +) -> st.SearchStrategy[Sequence[Hashable]]: ... @@ -392,21 +396,21 @@ def unique_subset_of( @st.composite def unique_subset_of( draw: st.DrawFn, - objs: Union[Iterable[Hashable], Mapping[Hashable, Any]], + objs: Union[Sequence[Hashable], Mapping[Hashable, Any]], *, min_size: int = 0, max_size: Union[int, None] = None, -) -> Union[Iterable[Hashable], Mapping[Hashable, Any]]: +) -> Union[Sequence[Hashable], Mapping[Hashable, Any]]: """ - Return a strategy which generates a unique subset of the given objs. + Return a strategy which generates a unique subset of the given objects. - Each entry in the output subset will be unique (if input was an iterable) or have a unique key (if it was a mapping). + Each entry in the output subset will be unique (if input was a sequence) or have a unique key (if it was a mapping). Requires the hypothesis package to be installed. Parameters ---------- - objs: Union[Iterable[Hashable], Mapping[Hashable, Any]] + objs: Union[Sequence[Hashable], Mapping[Hashable, Any]] Objects from which to sample to produce the subset. min_size: int, optional Minimum size of the returned subset. Default is 0. diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index 69467c59e08..92c7ac2c723 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -69,7 +69,7 @@ def test_restrict_names(self, data): assert dim.upper() == dim -def check_dict_values(dictionary: dict, allowed_attrs_values_types: set) -> bool: +def check_dict_values(dictionary: dict, allowed_attrs_values_types) -> bool: """Helper function to assert that all values in recursive dict match one of a set of types.""" for key, value in dictionary.items(): if isinstance(value, allowed_attrs_values_types) or value is None: @@ -118,7 +118,7 @@ def test_given_fixed_dim_names(self, data, fixed_dim_names): @given(st.data(), dimension_sizes()) def test_given_fixed_dim_sizes(self, data, dim_sizes): - var = data.draw(variables(dims=st.just(dim_sizes))) # type: ignore[arg-type] + var = data.draw(variables(dims=st.just(dim_sizes))) assert var.dims == tuple(dim_sizes.keys()) assert var.shape == tuple(dim_sizes.values()) @@ -226,7 +226,7 @@ class TestUniqueSubsetOf: @given(st.data()) def test_invalid(self, data): with pytest.raises(TypeError, match="must be an Iterable or a Mapping"): - data.draw(unique_subset_of(0)) + data.draw(unique_subset_of(0)) # type: ignore[call-overload] with pytest.raises(ValueError, match="length-zero object"): data.draw(unique_subset_of({})) From 00ed3d619ba032476df5d3bc96234aed6d622050 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Sat, 11 Nov 2023 21:21:29 -0500 Subject: [PATCH 140/155] test that reduction example in docs actually works --- doc/user-guide/testing.rst | 10 +++------- xarray/tests/test_strategies.py | 24 ++++++++++++++++++++++++ 2 files changed, 27 insertions(+), 7 deletions(-) diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index bed161cf742..032b733674b 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -291,16 +291,12 @@ along any possible valid subset of the Variable's dimensions. import numpy.testing as npt - @given(st.data()) - def test_mean(data): + @given(st.data(), xrst.variables(dims=xrst.dimension_names(min_dims=1))) + def test_mean(data, var): """Test that the mean of an xarray Variable is always equal to the mean of the underlying array.""" - # create arbitrary data - array_dims = data.draw(dimension_names(min_dims=1)) - var = data.draw(variables(dims=st.just(array_dims))) - # specify arbitrary reduction along at least one dimension - reduction_dims = data.draw(xrst.unique_subset_of(array_dims, min_size=1)) + reduction_dims = data.draw(xrst.unique_subset_of(var.dims, min_size=1)) # create expected result (using nanmean because arrays with Nans will be generated) reduction_axes = tuple(var.get_axis_num(dim) for dim in reduction_dims) diff --git a/xarray/tests/test_strategies.py b/xarray/tests/test_strategies.py index 92c7ac2c723..44f0d56cde8 100644 --- a/xarray/tests/test_strategies.py +++ b/xarray/tests/test_strategies.py @@ -245,3 +245,27 @@ def test_iterable(self, data, dim_names): for dim in subset_of_dim_names: assert dim in dim_names + + +class TestReduction: + """ + These tests are for checking that the examples given in the docs page on testing actually work. + """ + + @given(st.data(), variables(dims=dimension_names(min_dims=1))) + def test_mean(self, data, var): + """ + Test that given a Variable of at least one dimension, + the mean of the Variable is always equal to the mean of the underlying array. + """ + + # specify arbitrary reduction along at least one dimension + reduction_dims = data.draw(unique_subset_of(var.dims, min_size=1)) + + # create expected result (using nanmean because arrays with Nans will be generated) + reduction_axes = tuple(var.get_axis_num(dim) for dim in reduction_dims) + expected = np.nanmean(var.data, axis=reduction_axes) + + # assert property is always satisfied + result = var.mean(dim=reduction_dims).data + npt.assert_equal(expected, result) From d265ddbe6809de34033a3550d15347eec8cb7317 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Sat, 11 Nov 2023 22:39:16 -0500 Subject: [PATCH 141/155] fix typing errors --- xarray/core/types.py | 2 +- xarray/testing/strategies.py | 12 +++++------- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/xarray/core/types.py b/xarray/core/types.py index 93e3e246575..02245ecd1de 100644 --- a/xarray/core/types.py +++ b/xarray/core/types.py @@ -173,7 +173,7 @@ def copy( # Temporary placeholder for indicating an array api compliant type. # hopefully in the future we can narrow this down more: -T_DuckArray = TypeVar("T_DuckArray", bound=Any) +T_DuckArray = TypeVar("T_DuckArray", bound=Any, covariant=True) ScalarOrArray = Union["ArrayLike", np.generic, np.ndarray, "DaskArray"] diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 8dd090cb676..95a9665db3d 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -30,7 +30,6 @@ def __call__( *, shape: "_ShapeLike", dtype: "_DTypeLikeNested", - **kwargs, ) -> st.SearchStrategy[T_DuckArray]: ... @@ -231,7 +230,7 @@ def variables( Parameters ---------- array_strategy_fn: Callable which returns a strategy generating array-likes, optional - Callable must accept shape and dtype kwargs, and must generate results consistent with its input. + Callable must only accept shape and dtype kwargs, and must generate results consistent with its input. If not passed the default is to generate a small numpy array with one of the supported_dtypes. dims: Strategy for generating the dimensions, optional Can either be a strategy for generating a sequence of string dimension names, @@ -315,7 +314,6 @@ def variables( ) _array_strategy_fn: ArrayStrategyFn - array_strategy: st.SearchStrategy[T_DuckArray] if array_strategy_fn is None: _array_strategy_fn = smallish_arrays # type: ignore[assignment] elif not callable(array_strategy_fn): @@ -355,17 +353,17 @@ def variables( _data = draw(array_strategy) - if _data.shape != _shape: # type: ignore[attr-defined] + if _data.shape != _shape: raise ValueError( "array_strategy_fn returned an array object with a different shape than it was passed." - f"Passed {_shape}, but returned {_data.shape}." # type: ignore[attr-defined] + f"Passed {_shape}, but returned {_data.shape}." "Please either specify a consistent shape via the dims kwarg or ensure the array_strategy_fn callable " "obeys the shape argument passed to it." ) - if _data.dtype != _dtype: # type: ignore[attr-defined] + if _data.dtype != _dtype: raise ValueError( "array_strategy_fn returned an array object with a different dtype than it was passed." - f"Passed {_dtype}, but returned {_data.dtype}" # type: ignore[attr-defined] + f"Passed {_dtype}, but returned {_data.dtype}" "Please either specify a consistent dtype via the dtype kwarg or ensure the array_strategy_fn callable " "obeys the dtype argument passed to it." ) From 0e872a813d223b66cb89549de48e6232d4394d1c Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Sat, 11 Nov 2023 22:39:32 -0500 Subject: [PATCH 142/155] simply generation of sparse arrays in example --- doc/user-guide/testing.rst | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index 032b733674b..fc8ef5ee0d2 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -178,15 +178,14 @@ different type: .. ipython:: python def convert_to_sparse(var): - if var.ndim == 0: - return var - else: - var.data = sparse.COO.from_numpy(var.to_numpy()) - return var + var.data = sparse.COO.from_numpy(var.to_numpy()) + return var .. ipython:: python - sparse_variables = xrst.variables().map(convert_to_sparse) + sparse_variables = xrst.variables(dims=dimension_names(min_dims=1)).map( + convert_to_sparse + ) sparse_variables.example() sparse_variables.example() From 3d43ed6dfa4e0f6578fedead8a38db8361c3a10e Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Mon, 13 Nov 2023 11:26:27 -0500 Subject: [PATCH 143/155] fix impot in docs example --- doc/user-guide/testing.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index fc8ef5ee0d2..a15b3ac0a09 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -183,7 +183,7 @@ different type: .. ipython:: python - sparse_variables = xrst.variables(dims=dimension_names(min_dims=1)).map( + sparse_variables = xrst.variables(dims=xrst.dimension_names(min_dims=1)).map( convert_to_sparse ) From bdf3aed807bb15dcd049e8e30dfeb3622113de8e Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Mon, 13 Nov 2023 12:17:22 -0500 Subject: [PATCH 144/155] correct type hints in sparse example --- doc/user-guide/testing.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index a15b3ac0a09..8895c6f6ddf 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -196,8 +196,8 @@ different type: @st.composite def sparse_random_arrays( - draw, shape: tuple[int] = None - ) -> st.SearchStrategy[sparse._coo.core.COO]: + draw, shape: tuple[int] + ) -> sparse._coo.core.COO: """Strategy which generates random sparse.COO arrays""" if shape is None: shape = draw(npst.array_shapes()) @@ -208,7 +208,7 @@ different type: def sparse_random_arrays_fn( - *, shape: tuple[int] = None, dtype: np.dtype = None + *, shape: tuple[int, ...], dtype: np.dtype ) -> st.SearchStrategy[sparse._coo.core.COO]: return sparse_random_arrays(shape=shape) From afd526d7953b5607ee6b050593521955063ab93d Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 13 Nov 2023 17:18:05 +0000 Subject: [PATCH 145/155] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- doc/user-guide/testing.rst | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index 8895c6f6ddf..266f9d40b41 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -195,9 +195,7 @@ different type: .. ipython:: python @st.composite - def sparse_random_arrays( - draw, shape: tuple[int] - ) -> sparse._coo.core.COO: + def sparse_random_arrays(draw, shape: tuple[int]) -> sparse._coo.core.COO: """Strategy which generates random sparse.COO arrays""" if shape is None: shape = draw(npst.array_shapes()) From 6bbd13b1da7e619ff9d8426160283f4c08f5f8d4 Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Mon, 13 Nov 2023 13:19:18 -0700 Subject: [PATCH 146/155] Use .copy in convert_to_sparse Co-authored-by: Justus Magin --- doc/user-guide/testing.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index 266f9d40b41..f2297ef9e17 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -178,8 +178,7 @@ different type: .. ipython:: python def convert_to_sparse(var): - var.data = sparse.COO.from_numpy(var.to_numpy()) - return var + return var.copy(data=sparse.COO.from_numpy(var.to_numpy())) .. ipython:: python From 29ecd7d6ed2b5d7e26d3eda250ab0c77d3db9f93 Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Mon, 13 Nov 2023 13:37:58 -0700 Subject: [PATCH 147/155] Use st.builds in sparse example Co-authored-by: Justus Magin --- doc/user-guide/testing.rst | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/doc/user-guide/testing.rst b/doc/user-guide/testing.rst index f2297ef9e17..13279eccb0b 100644 --- a/doc/user-guide/testing.rst +++ b/doc/user-guide/testing.rst @@ -193,15 +193,15 @@ different type: .. ipython:: python - @st.composite - def sparse_random_arrays(draw, shape: tuple[int]) -> sparse._coo.core.COO: + def sparse_random_arrays(shape: tuple[int]) -> sparse._coo.core.COO: """Strategy which generates random sparse.COO arrays""" if shape is None: - shape = draw(npst.array_shapes()) - density = draw(st.integers(min_value=0, max_value=1)) - return sparse.random( - shape=shape, density=density - ) # note sparse.random does not accept a dtype kwarg + shape = npst.array_shapes() + else: + shape = st.just(shape) + density = st.integers(min_value=0, max_value=1) + # note sparse.random does not accept a dtype kwarg + return st.builds(sparse.random, shape=shape, density=density) def sparse_random_arrays_fn( From 631e8106c997e54b16f1afc3694c9723018b4aac Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Mon, 13 Nov 2023 15:41:18 -0500 Subject: [PATCH 148/155] correct intersphinx link in whatsnew --- doc/whats-new.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 96364b91547..04d4a9d78e8 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -23,10 +23,9 @@ New Features ~~~~~~~~~~~~ - Added hypothesis strategies for generating :py:class:`xarray.Variable` objects containing arbitrary data, useful for parametrizing downstream tests. - Accessible under :py:func:`testing.strategies`, and documented in a new page on testing in the User Guide. + Accessible under :py:mod:`testing.strategies`, and documented in a new page on testing in the User Guide. (:issue:`6911`, :pull:`8404`) By `Tom Nicholas `_. - - Use `opt_einsum `_ for :py:func:`xarray.dot` by default if installed. By `Deepak Cherian `_. (:issue:`7764`, :pull:`8373`). - Add ``DataArray.dt.total_seconds()`` method to match the Pandas API. (:pull:`8435`). From 4412d9886cc498eaf6febbabaec6d88b316e277a Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Mon, 13 Nov 2023 16:03:05 -0500 Subject: [PATCH 149/155] rename module containing assertion functions --- xarray/testing/__init__.py | 2 +- xarray/testing/{testing.py => assertions.py} | 0 xarray/tests/{test_testing.py => test_assertions.py} | 0 3 files changed, 1 insertion(+), 1 deletion(-) rename xarray/testing/{testing.py => assertions.py} (100%) rename xarray/tests/{test_testing.py => test_assertions.py} (100%) diff --git a/xarray/testing/__init__.py b/xarray/testing/__init__.py index 7f35a2b2be2..ab2f8ba4357 100644 --- a/xarray/testing/__init__.py +++ b/xarray/testing/__init__.py @@ -1,4 +1,4 @@ -from xarray.testing.testing import ( # noqa: F401 +from xarray.testing.assertions import ( # noqa: F401 _assert_dataarray_invariants, _assert_dataset_invariants, _assert_indexes_invariants_checks, diff --git a/xarray/testing/testing.py b/xarray/testing/assertions.py similarity index 100% rename from xarray/testing/testing.py rename to xarray/testing/assertions.py diff --git a/xarray/tests/test_testing.py b/xarray/tests/test_assertions.py similarity index 100% rename from xarray/tests/test_testing.py rename to xarray/tests/test_assertions.py From 1ea0dcf1995c64a26baa0da8feeaea78eb2449aa Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Mon, 13 Nov 2023 16:25:02 -0500 Subject: [PATCH 150/155] clarify sentence --- xarray/testing/strategies.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 95a9665db3d..9bfd9a45e60 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -38,9 +38,8 @@ def supported_dtypes() -> st.SearchStrategy[np.dtype]: """ Generates only those numpy dtypes which xarray can handle. - Avoiding using hypothesis.extra.numpy.scalar_dtypes is required to exclude weirder dtypes e.g. unicode, byte_string, array, or nested dtypes. - Also required to dodge bugs with pandas non-nanosecond datetime overflows. - Note does not generate all dtypes that xarray can handle - just only generates dtypes which it definitely can. + Use instead of hypothesis.extra.numpy.scalar_dtypes in order to exclude weirder dtypes such as unicode, byte_string, array, or nested dtypes. + Also excludes datetimes, which dodges bugs with pandas non-nanosecond datetime overflows. Requires the hypothesis package to be installed. """ From cf1a45e3847bcfacf2fd9d7c19e70db115df8c8e Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Mon, 13 Nov 2023 16:29:03 -0500 Subject: [PATCH 151/155] add general ImportError if hypothesis not installed --- xarray/testing/strategies.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 9bfd9a45e60..d8434bed68c 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -1,8 +1,14 @@ from collections.abc import Hashable, Iterable, Mapping, Sequence from typing import TYPE_CHECKING, Any, Protocol, Union, overload +try: + import hypothesis.strategies as st +except ImportError as e: + raise ImportError( + "`xarray.stesting.strategies` requires `hypothesis` to be installed." + ) from e + import hypothesis.extra.numpy as npst -import hypothesis.strategies as st import numpy as np from hypothesis.errors import InvalidArgument From ea738cdd3b28a99e254c92d9a941077f9e9b0a31 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Tue, 14 Nov 2023 10:20:53 -0500 Subject: [PATCH 152/155] add See Also link to strategies docs page from docstring of every strategy --- xarray/testing/strategies.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index d8434bed68c..a8f2bd35bf5 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -48,6 +48,10 @@ def supported_dtypes() -> st.SearchStrategy[np.dtype]: Also excludes datetimes, which dodges bugs with pandas non-nanosecond datetime overflows. Requires the hypothesis package to be installed. + + See Also + -------- + :ref:`testing.hypothesis`_ """ # TODO should this be exposed publicly? # We should at least decide what the set of numpy dtypes that xarray officially supports is. @@ -84,6 +88,10 @@ def smallish_arrays( elements fill unique + + See Also + -------- + :ref:`testing.hypothesis`_ """ # TODO here we may also wish to generalize/restrict the dtypes produced by xarray's default test strategies return npst.arrays( @@ -102,6 +110,10 @@ def names() -> st.SearchStrategy[str]: Generates arbitrary string names for dimensions / variables. Requires the hypothesis package to be installed. + + See Also + -------- + :ref:`testing.hypothesis`_ """ return st.text( _readable_characters, @@ -166,6 +178,10 @@ def dimension_sizes( max_side: int, optional Minimum size of a dimension. Default is `min_length` + 5. + + See Also + -------- + :ref:`testing.hypothesis`_ """ if max_side is None: @@ -201,6 +217,10 @@ def attrs() -> st.SearchStrategy[Mapping[Hashable, Any]]: The generated dictionaries can potentially be recursive. Requires the hypothesis package to be installed. + + See Also + -------- + :ref:`testing.hypothesis`_ """ return st.recursive( st.dictionaries(_attr_keys, _attr_values), @@ -300,6 +320,10 @@ def variables( array([[-1.00000000e+007+3.40282347e+038j], [-2.75034266e-225+2.22507386e-311j]]) + + See Also + -------- + :ref:`testing.hypothesis`_ """ if not isinstance(dims, st.SearchStrategy) and dims is not None: @@ -432,6 +456,10 @@ def unique_subset_of( {'y': 3} >>> unique_subset_of(["x", "y"]).example() # doctest: +SKIP ['x'] + + See Also + -------- + :ref:`testing.hypothesis`_ """ if not isinstance(objs, Iterable): raise TypeError( From 79b009497803f74871da475a6b6ef59ce025d971 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Tue, 14 Nov 2023 10:23:47 -0500 Subject: [PATCH 153/155] typo in ImportError message --- xarray/testing/strategies.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index a8f2bd35bf5..4c74719ea97 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -5,7 +5,7 @@ import hypothesis.strategies as st except ImportError as e: raise ImportError( - "`xarray.stesting.strategies` requires `hypothesis` to be installed." + "`xarray.testing.strategies` requires `hypothesis` to be installed." ) from e import hypothesis.extra.numpy as npst From cbcd486d5938037daef635c229e417f33709d155 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Tue, 5 Dec 2023 00:12:37 -0500 Subject: [PATCH 154/155] remove extra blank lines in examples --- xarray/testing/strategies.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 4c74719ea97..8bf8374584a 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -292,17 +292,14 @@ def variables( array([[[-10000000., -10000000.], [-10000000., -10000000.]], - [[-10000000., -10000000.], [ 0., -10000000.]], - [[ 0., -10000000.], [-10000000., inf]], - [[ -0., -10000000.], [-10000000., -0.]]], dtype=float32) Attributes: - śřĴ: {'ĉ': {'iĥſ': array([-30117, -1740], dtype=int16)}} + śřĴ: {'ĉ': {'iĥf': array([-30117, -1740], dtype=int16)}} Generate only Variable objects with certain dimension names: From 69ddd08281785189e921874810a78ae550857600 Mon Sep 17 00:00:00 2001 From: TomNicholas Date: Tue, 5 Dec 2023 17:10:54 -0500 Subject: [PATCH 155/155] remove smallish_arrays --- xarray/testing/strategies.py | 39 ++---------------------------------- 1 file changed, 2 insertions(+), 37 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 8bf8374584a..d08cbc0b584 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -63,42 +63,6 @@ def supported_dtypes() -> st.SearchStrategy[np.dtype]: ) -def smallish_arrays( - shape: Union[ - tuple[int, ...], st.SearchStrategy[tuple[int, ...]] - ] = npst.array_shapes(min_dims=0, max_dims=3, min_side=0, max_side=4), - dtype: Union[np.dtype, st.SearchStrategy[np.dtype]] = supported_dtypes(), - *, - elements=None, - fill=None, - unique=False, -) -> st.SearchStrategy[np.ndarray]: - """ - Generates arbitrary array API-compliant numpy arrays. - - By default generates arrays with no more than 4 elements per axis for performance, using supported_dtypes. - - Requires the hypothesis package to be installed. - - Parameters - ---------- - shape - dtype - Default is to use any of the scalar dtypes defined in the array API standard. - elements - fill - unique - - See Also - -------- - :ref:`testing.hypothesis`_ - """ - # TODO here we may also wish to generalize/restrict the dtypes produced by xarray's default test strategies - return npst.arrays( - dtype=dtype, shape=shape, elements=elements, fill=fill, unique=unique - ) - - # TODO Generalize to all valid unicode characters once formatting bugs in xarray's reprs are fixed + docs can handle it. _readable_characters = st.characters( categories=["L", "N"], max_codepoint=0x017F @@ -341,7 +305,8 @@ def variables( _array_strategy_fn: ArrayStrategyFn if array_strategy_fn is None: - _array_strategy_fn = smallish_arrays # type: ignore[assignment] + # For some reason if I move the default value to the function signature definition mypy incorrectly says the ignore is no longer necessary, making it impossible to satisfy mypy + _array_strategy_fn = npst.arrays # type: ignore[assignment] # npst.arrays has extra kwargs that we aren't using later elif not callable(array_strategy_fn): raise InvalidArgument( "array_strategy_fn must be a Callable that accepts the kwargs dtype and shape and returns a hypothesis "