diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml index df81315ba486..91351d2fd13a 100644 --- a/.github/workflows/conda-package.yml +++ b/.github/workflows/conda-package.yml @@ -40,6 +40,7 @@ env: third_party/cupy/linalg_tests/test_norms.py third_party/cupy/linalg_tests/test_product.py third_party/cupy/linalg_tests/test_solve.py + third_party/cupy/linalg_tests/test_einsum.py third_party/cupy/logic_tests/test_comparison.py third_party/cupy/logic_tests/test_truth.py third_party/cupy/manipulation_tests/test_basic.py diff --git a/doc/known_words.txt b/doc/known_words.txt index 34e9c2aeb631..758594b6fa3e 100644 --- a/doc/known_words.txt +++ b/doc/known_words.txt @@ -5,7 +5,9 @@ broadcastable broadcasted byteorder Cholesky +combinatorially conda +cubically Decompositions dimensionality docstring @@ -22,6 +24,7 @@ finfo finiteness Fortran Frobenius +Hadamard Hypergeometric iinfo Infs diff --git a/dpnp/dpnp_array.py b/dpnp/dpnp_array.py index ad579af58067..b65efa9b9938 100644 --- a/dpnp/dpnp_array.py +++ b/dpnp/dpnp_array.py @@ -580,14 +580,14 @@ def astype(self, dtype, order="K", casting="unsafe", subok=True, copy=True): If it is False and no cast happens, then this method returns the array itself. Otherwise, a copy is returned. casting : {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}, optional - Controls what kind of data casting may occur. Defaults to 'unsafe' for backwards compatibility. - + Controls what kind of data casting may occur. + Defaults to ``'unsafe'`` for backwards compatibility. - 'no' means the data types should not be cast at all. - 'equiv' means only byte-order changes are allowed. - 'safe' means only casts which can preserve values are allowed. - - 'same_kind' means only safe casts or casts within a kind, like float64 to float32, are allowed. + - 'same_kind' means only safe casts or casts within a kind, like + float64 to float32, are allowed. - 'unsafe' means any data conversions may be done. - copy : bool, optional By default, ``astype`` always returns a newly allocated array. If this is set to ``False``, and the `dtype`, `order`, and `subok` diff --git a/dpnp/dpnp_iface.py b/dpnp/dpnp_iface.py index 19470a8a14bf..9111fd0b50bb 100644 --- a/dpnp/dpnp_iface.py +++ b/dpnp/dpnp_iface.py @@ -331,7 +331,7 @@ def check_supported_arrays_type(*arrays, scalar_type=False, all_scalars=False): f"An array must be any of supported type, but got {type(a)}" ) - if len(arrays) > 1 and not (all_scalars or any_is_array): + if len(arrays) > 0 and not (all_scalars or any_is_array): raise TypeError( "At least one input must be of supported array type, " "but got all scalars." diff --git a/dpnp/dpnp_iface_linearalgebra.py b/dpnp/dpnp_iface_linearalgebra.py index 32741ff0edec..57dbf0283c57 100644 --- a/dpnp/dpnp_iface_linearalgebra.py +++ b/dpnp/dpnp_iface_linearalgebra.py @@ -49,6 +49,7 @@ ) from .dpnp_utils.dpnp_utils_linearalgebra import ( dpnp_dot, + dpnp_einsum, dpnp_kron, dpnp_matmul, ) @@ -122,11 +123,11 @@ def dot(a, b, out=None): array([[4, 1], [2, 2]]) - >>> a = np.arange(3*4*5*6).reshape((3,4,5,6)) - >>> b = np.arange(3*4*5*6)[::-1].reshape((5,4,6,3)) - >>> np.dot(a, b)[2,3,2,1,2,2] + >>> a = np.arange(3 * 4 * 5 * 6).reshape((3, 4, 5, 6)) + >>> b = np.arange(3 * 4 * 5 * 6)[::-1].reshape((5, 4, 6, 3)) + >>> np.dot(a, b)[2, 3, 2, 1, 2, 2] array(499128) - >>> sum(a[2,3,2,:] * b[1,2,:,2]) + >>> sum(a[2, 3, 2, :] * b[1, 2, :, 2]) array(499128) """ @@ -166,15 +167,61 @@ def dot(a, b, out=None): return dpnp.get_result_array(result, out, casting="no") -def einsum(*args, **kwargs): +def einsum( + *operands, out=None, dtype=None, order="K", casting="safe", optimize=False +): """ + einsum(subscripts, *operands, out=None, dtype=None, order="K", \ + casting="safe", optimize=False) + Evaluates the Einstein summation convention on the operands. For full documentation refer to :obj:`numpy.einsum`. - Limitations - ----------- - Function is executed sequentially on CPU. + Parameters + ---------- + subscripts : str + Specifies the subscripts for summation as comma separated list of + subscript labels. An implicit (classical Einstein summation) + calculation is performed unless the explicit indicator '->' is + included as well as subscript labels of the precise output form. + *operands : sequence of {dpnp.ndarrays, usm_ndarray} + These are the arrays for the operation. + out : {dpnp.ndarrays, usm_ndarray, None}, optional + If provided, the calculation is done into this array. + dtype : {dtype, None}, optional + If provided, forces the calculation to use the data type specified. + Default is ``None``. + order : {"C", "F", "A", "K"}, optional + Controls the memory layout of the output. ``"C"`` means it should be + C-contiguous. ``"F"`` means it should be F-contiguous, ``"A"`` means + it should be ``"F"`` if the inputs are all ``"F"``, ``"C"`` otherwise. + ``"K"`` means it should be as close to the layout as the inputs as + is possible, including arbitrarily permuted axes. + Default is ``"K"``. + casting : {"no", "equiv", "safe", "same_kind", "unsafe"}, optional + Controls what kind of data casting may occur. Setting this to + ``"unsafe"`` is not recommended, as it can adversely affect + accumulations. + + * ``"no"`` means the data types should not be cast at all. + * ``"equiv"`` means only byte-order changes are allowed. + * ``"safe"`` means only casts which can preserve values are allowed. + * ``"same_kind"`` means only safe casts or casts within a kind, + like float64 to float32, are allowed. + * ``"unsafe"`` means any data conversions may be done. + + Default is ``"safe"``. + optimize : {False, True, "greedy", "optimal"}, optional + Controls if intermediate optimization should occur. No optimization + will occur if ``False`` and ``True`` will default to the ``"greedy"`` + algorithm. Also accepts an explicit contraction list from the + :obj:`dpnp.einsum_path` function. Default is ``False``. + + Returns + ------- + out : dpnp.ndarray + The calculation based on the Einstein summation convention. See Also ------- @@ -183,36 +230,323 @@ def einsum(*args, **kwargs): :obj:`dpnp.dot` : Returns the dot product of two arrays. :obj:`dpnp.inner` : Returns the inner product of two arrays. :obj:`dpnp.outer` : Returns the outer product of two arrays. + :obj:`dpnp.tensordot` : Sum products over arbitrary axes. + :obj:`dpnp.linalg.multi_dot` : Chained dot product. + + Examples + -------- + >>> import dpnp as np + >>> a = np.arange(25).reshape(5,5) + >>> b = np.arange(5) + >>> c = np.arange(6).reshape(2,3) + + Trace of a matrix: + + >>> np.einsum("ii", a) + array(60) + >>> np.einsum(a, [0,0]) + array(60) + >>> np.trace(a) + array(60) + + Extract the diagonal (requires explicit form): + + >>> np.einsum("ii->i", a) + array([ 0, 6, 12, 18, 24]) + >>> np.einsum(a, [0, 0], [0]) + array([ 0, 6, 12, 18, 24]) + >>> np.diag(a) + array([ 0, 6, 12, 18, 24]) + + Sum over an axis (requires explicit form): + + >>> np.einsum("ij->i", a) + array([ 10, 35, 60, 85, 110]) + >>> np.einsum(a, [0, 1], [0]) + array([ 10, 35, 60, 85, 110]) + >>> np.sum(a, axis=1) + array([ 10, 35, 60, 85, 110]) + + For higher dimensional arrays summing a single axis can be done + with ellipsis: + + >>> np.einsum("...j->...", a) + array([ 10, 35, 60, 85, 110]) + >>> np.einsum(a, [Ellipsis,1], [Ellipsis]) + array([ 10, 35, 60, 85, 110]) + + Compute a matrix transpose, or reorder any number of axes: + + >>> np.einsum("ji", c) + array([[0, 3], + [1, 4], + [2, 5]]) + >>> np.einsum("ij->ji", c) + array([[0, 3], + [1, 4], + [2, 5]]) + >>> np.einsum(c, [1, 0]) + array([[0, 3], + [1, 4], + [2, 5]]) + >>> np.transpose(c) + array([[0, 3], + [1, 4], + [2, 5]]) + + Vector inner products: + + >>> np.einsum("i,i", b, b) + array(30) + >>> np.einsum(b, [0], b, [0]) + array(30) + >>> np.inner(b,b) + array(30) + + Matrix vector multiplication: + + >>> np.einsum("ij,j", a, b) + array([ 30, 80, 130, 180, 230]) + >>> np.einsum(a, [0,1], b, [1]) + array([ 30, 80, 130, 180, 230]) + >>> np.dot(a, b) + array([ 30, 80, 130, 180, 230]) + >>> np.einsum("...j,j", a, b) + array([ 30, 80, 130, 180, 230]) + + Broadcasting and scalar multiplication: + + >>> np.einsum("..., ...", 3, c) + array([[ 0, 3, 6], + [ 9, 12, 15]]) + >>> np.einsum(",ij", 3, c) + array([[ 0, 3, 6], + [ 9, 12, 15]]) + >>> np.einsum(3, [Ellipsis], c, [Ellipsis]) + array([[ 0, 3, 6], + [ 9, 12, 15]]) + >>> np.multiply(3, c) + array([[ 0, 3, 6], + [ 9, 12, 15]]) + + Vector outer product: + + >>> np.einsum("i,j", np.arange(2)+1, b) + array([[0, 1, 2, 3, 4], + [0, 2, 4, 6, 8]]) + >>> np.einsum(np.arange(2)+1, [0], b, [1]) + array([[0, 1, 2, 3, 4], + [0, 2, 4, 6, 8]]) + >>> np.outer(np.arange(2)+1, b) + array([[0, 1, 2, 3, 4], + [0, 2, 4, 6, 8]]) + + Tensor contraction: + + >>> a = np.arange(60.).reshape(3, 4, 5) + >>> b = np.arange(24.).reshape(4, 3, 2) + >>> np.einsum("ijk,jil->kl", a, b) + array([[4400., 4730.], + [4532., 4874.], + [4664., 5018.], + [4796., 5162.], + [4928., 5306.]]) + >>> np.einsum(a, [0, 1, 2], b, [1, 0, 3], [2, 3]) + array([[4400., 4730.], + [4532., 4874.], + [4664., 5018.], + [4796., 5162.], + [4928., 5306.]]) + >>> np.tensordot(a, b, axes=([1, 0],[0, 1])) + array([[4400., 4730.], + [4532., 4874.], + [4664., 5018.], + [4796., 5162.], + [4928., 5306.]]) + + Example of ellipsis use: + + >>> a = np.arange(6).reshape((3, 2)) + >>> b = np.arange(12).reshape((4, 3)) + >>> np.einsum("ki,jk->ij", a, b) + array([[10, 28, 46, 64], + [13, 40, 67, 94]]) + >>> np.einsum("ki,...k->i...", a, b) + array([[10, 28, 46, 64], + [13, 40, 67, 94]]) + >>> np.einsum("k...,jk", a, b) + array([[10, 28, 46, 64], + [13, 40, 67, 94]]) + + Chained array operations. For more complicated contractions, speed ups + might be achieved by repeatedly computing a "greedy" path or computing + the "optimal" path in advance and repeatedly applying it, using an + `einsum_path` insertion. Performance improvements can be particularly + significant with larger arrays: + + >>> a = np.ones(64000).reshape(20, 40, 80) + + Basic `einsum`: 119 ms ± 26 ms per loop (evaluated on 12th + Gen Intel\u00AE Core\u2122 i7 processor) + + >>> %timeit np.einsum("ijk,ilm,njm,nlk,abc->",a,a,a,a,a) + + Sub-optimal `einsum`: 32.9 ms ± 5.1 ms per loop + + >>> %timeit np.einsum("ijk,ilm,njm,nlk,abc->",a,a,a,a,a, optimize="optimal") + + Greedy `einsum`: 28.6 ms ± 4.8 ms per loop + + >>> %timeit np.einsum("ijk,ilm,njm,nlk,abc->",a,a,a,a,a, optimize="greedy") + + Optimal `einsum`: 26.9 ms ± 6.3 ms per loop + + >>> path = np.einsum_path( + "ijk,ilm,njm,nlk,abc->",a,a,a,a,a, optimize="optimal" + )[0] + >>> %timeit np.einsum("ijk,ilm,njm,nlk,abc->",a,a,a,a,a, optimize=path) """ - return call_origin(numpy.einsum, *args, **kwargs) + if optimize is True: + optimize = "greedy" + + return dpnp_einsum( + *operands, + out=out, + dtype=dtype, + order=order, + casting=casting, + optimize=optimize, + ) -def einsum_path(*args, **kwargs): +def einsum_path(*operands, optimize="greedy", einsum_call=False): """ - einsum_path(subscripts, *operands, optimize='greedy') + einsum_path(subscripts, *operands, optimize="greedy") Evaluates the lowest cost contraction order for an einsum expression by considering the creation of intermediate arrays. For full documentation refer to :obj:`numpy.einsum_path`. - Limitations - ----------- - Function is executed sequentially on CPU. + Parameters + ---------- + subscripts : str + Specifies the subscripts for summation. + *operands : sequence of arrays + These are the arrays for the operation in any form that can be + converted to an array. This includes scalars, lists, lists of + tuples, tuples, tuples of tuples, tuples of lists, and ndarrays. + optimize : {bool, list, tuple, None, "greedy", "optimal"} + Choose the type of path. If a tuple is provided, the second argument is + assumed to be the maximum intermediate size created. If only a single + argument is provided the largest input or output array size is used + as a maximum intermediate size. + + * if a list is given that starts with ``einsum_path``, uses this as the + contraction path + * if ``False`` or ``None`` no optimization is taken + * if ``True`` defaults to the "greedy" algorithm + * ``"optimal"`` is an algorithm that combinatorially explores all + possible ways of contracting the listed tensors and chooses the + least costly path. Scales exponentially with the number of terms + in the contraction. + * ``"greedy"`` is an algorithm that chooses the best pair contraction + at each step. Effectively, this algorithm searches the largest inner, + Hadamard, and then outer products at each step. Scales cubically with + the number of terms in the contraction. Equivalent to the + ``"optimal"`` path for most contractions. + + Default is ``"greedy"``. + + Returns + ------- + path : list of tuples + A list representation of the einsum path. + string_repr : str + A printable representation of the einsum path. + + Notes + ----- + The resulting path indicates which terms of the input contraction should be + contracted first, the result of this contraction is then appended to the + end of the contraction list. This list can then be iterated over until all + intermediate contractions are complete. See Also -------- :obj:`dpnp.einsum` : Evaluates the Einstein summation convention on the operands. + :obj:`dpnp.linalg.multi_dot` : Chained dot product. :obj:`dpnp.dot` : Returns the dot product of two arrays. :obj:`dpnp.inner` : Returns the inner product of two arrays. :obj:`dpnp.outer` : Returns the outer product of two arrays. + Examples + -------- + We can begin with a chain dot example. In this case, it is optimal to + contract the ``b`` and ``c`` tensors first as represented by the first + element of the path ``(1, 2)``. The resulting tensor is added to the end + of the contraction and the remaining contraction ``(0, 1)`` is then + completed. + + >>> import dpnp as np + >>> np.random.seed(123) + >>> a = np.random.rand(2, 2) + >>> b = np.random.rand(2, 5) + >>> c = np.random.rand(5, 2) + >>> path_info = np.einsum_path("ij,jk,kl->il", a, b, c, optimize="greedy") + + >>> print(path_info[0]) + ['einsum_path', (1, 2), (0, 1)] + + >>> print(path_info[1]) + Complete contraction: ij,jk,kl->il # may vary + Naive scaling: 4 + Optimized scaling: 3 + Naive FLOP count: 1.200e+02 + Optimized FLOP count: 5.700e+01 + Theoretical speedup: 2.105 + Largest intermediate: 4.000e+00 elements + ------------------------------------------------------------------------- + scaling current remaining + ------------------------------------------------------------------------- + 3 kl,jk->jl ij,jl->il + 3 jl,ij->il il->il + + A more complex index transformation example. + + >>> I = np.random.rand(10, 10, 10, 10) + >>> C = np.random.rand(10, 10) + >>> path_info = np.einsum_path( + "ea,fb,abcd,gc,hd->efgh", C, C, I, C, C, optimize="greedy" + ) + >>> print(path_info[0]) + ['einsum_path', (0, 2), (0, 3), (0, 2), (0, 1)] + >>> print(path_info[1]) + Complete contraction: ea,fb,abcd,gc,hd->efgh # may vary + Naive scaling: 8 + Optimized scaling: 5 + Naive FLOP count: 5.000e+08 + Optimized FLOP count: 8.000e+05 + Theoretical speedup: 624.999 + Largest intermediate: 1.000e+04 elements + -------------------------------------------------------------------------- + scaling current remaining + -------------------------------------------------------------------------- + 5 abcd,ea->bcde fb,gc,hd,bcde->efgh + 5 bcde,fb->cdef gc,hd,cdef->efgh + 5 cdef,gc->defg hd,defg->efgh + 5 defg,hd->efgh efgh->efgh + """ - return call_origin(numpy.einsum_path, *args, **kwargs) + return numpy.einsum_path( + *operands, + optimize=optimize, + einsum_call=einsum_call, + ) def inner(a, b): @@ -408,7 +742,7 @@ def matmul( Type to use in computing the matrix product. By default, the returned array will have data type that is determined by considering Promotion Type Rule and device capabilities. - casting : {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}, optional + casting : {"no", "equiv", "safe", "same_kind", "unsafe"}, optional Controls what kind of data casting may occur. Default: ``"same_kind"``. order : {"C", "F", "A", "K", None}, optional Memory layout of the newly output array, if parameter `out` is ``None``. diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py index 3bab6b5363aa..68f5b5d34ae4 100644 --- a/dpnp/dpnp_iface_mathematical.py +++ b/dpnp/dpnp_iface_mathematical.py @@ -145,7 +145,7 @@ def _append_to_diff_array(a, axis, combined, values): """ - dpnp.check_supported_arrays_type(values, scalar_type=True) + dpnp.check_supported_arrays_type(values, scalar_type=True, all_scalars=True) if dpnp.isscalar(values): values = dpnp.asarray( values, sycl_queue=a.sycl_queue, usm_type=a.usm_type diff --git a/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py b/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py index 080c3c52b3c8..92ea44e6c925 100644 --- a/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py +++ b/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py @@ -23,6 +23,11 @@ # THE POSSIBILITY OF SUCH DAMAGE. # ***************************************************************************** +import copy +import itertools +import operator +import warnings + import dpctl import dpctl.tensor as dpt import dpctl.tensor._tensor_elementwise_impl as tei @@ -35,7 +40,72 @@ from dpnp.dpnp_array import dpnp_array from dpnp.dpnp_utils import get_usm_allocations -__all__ = ["dpnp_cross", "dpnp_dot", "dpnp_kron", "dpnp_matmul"] +_einsum_symbols = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" + + +__all__ = ["dpnp_cross", "dpnp_dot", "dpnp_einsum", "dpnp_kron", "dpnp_matmul"] + + +def _calc_offset(shape, linear_id, strides): + """ + Calculate the offset in a multi-dimensional array given the shape, linear_id, and strides. + + Parameters + ---------- + shape : tuple + The shape of the multi-dimensional array. + linear_id : int + The linear index in the multi-dimensional array. + strides : tuple + The strides of the multi-dimensional array. + + Returns + ------- + out : int + The offset in the multi-dimensional array. + + """ + + offset = 0 + indices = _index_linear_to_tuple(shape, linear_id) + for i in range(len(indices)): + offset += indices[i] * strides[i] + return offset + + +def _chr(label): + """ + Copied from _chr in cupy/core/_einsum.py + + Converts an integer label to a character representation. + + Parameters + ---------- + label : int + The integer label to be converted. + + Returns + ------- + out : str + A string representation of the label. If the label is negative, + it returns a string in the format '...[label]', where label is the + negative integer. Otherwise, it returns a single character string + representing the label. + + Examples + -------- + >>> import dpnp.dpnp_utils.dpnp_utils_linearalgebra as np_util + >>> np_util._chr(97) + 'a' + >>> np_util._chr(-1) + '...[-1]' + + """ + + if label < 0: + return f"...[{label}]" + else: + return chr(label) def _create_result_array(x1, x2, out, shape, dtype, usm_type, sycl_queue): @@ -73,6 +143,61 @@ def _create_result_array(x1, x2, out, shape, dtype, usm_type, sycl_queue): ) +def _compute_size_by_dict(indices, idx_dict): + """ + Copied from _compute_size_by_dict in numpy/core/einsumfunc.py + + Computes the product of the elements in `indices` based on the dictionary + `idx_dict`. + + Parameters + ---------- + indices : iterable + Indices to base the product on. + idx_dict : dictionary + Dictionary of index sizes + + Returns + ------- + ret : int + The resulting product. + + Examples + -------- + >>> import dpnp.dpnp_utils.dpnp_utils_linearalgebra as np_util + >>> np_util._compute_size_by_dict("abbc", {"a": 2, "b":3, "c":5}) + 90 + + """ + ret = 1 + for i in indices: + ret *= idx_dict[i] + return ret + + +def _compute_size(start, shape): + """ + Compute the total size of a multi-dimensional array starting from a given index. + + Parameters + ---------- + start : int + The starting index from which to compute the size. + shape : tuple + The shape of the multi-dimensional array. + + Returns + ------- + out : int + The total size of the array. + + """ + ret = 1 + for i in range(start, len(shape)): + ret *= shape[i] + return ret + + def _copy_array(x, dep_events, host_events, contig_copy=False, dtype=None): """ Creating a copy of input array if needed. @@ -103,6 +228,267 @@ def _copy_array(x, dep_events, host_events, contig_copy=False, dtype=None): return x +def _einsum_diagonals(input_subscripts, operands): + """ + Adopted from _einsum_diagonals in cupy/core/_einsum.py + + Compute the diagonal for each operand. + + Parameters + ---------- + input_subscripts : tuple or list of str + Strings representing the Einstein summation notation for each operand. + operands : tuple or list of dpnp.ndarray or usm_ndarray + Input arrays. + + Raises + ------ + ValueError + If dimensions in the operands for collapsing indices don't match. + + Notes + ----- + This function mutates `input_subscripts` and `operands`. + + Examples + -------- + >>> import dpnp as np + >>> import dpnp.dpnp_utils.dpnp_utils_linearalgebra as np_util + >>> a = np.arange(9).reshape(3, 3) + >>> input_subscripts = ["ii"] + >>> operands = [a] + >>> np_util._einsum_diagonals(input_subscripts, operands) + >>> input_subscripts + [['i']] + >>> operands + [array([0, 4, 8])] + + """ + + for idx in range(len(input_subscripts)): + sub = input_subscripts[idx] + arr = operands[idx] + + # repetitive index in the input_subscripts + if len(set(sub)) < len(sub): + axeses = {} + for axis, label in enumerate(sub): + axeses.setdefault(label, []).append(axis) + + axeses = list(axeses.items()) + for label, axes in axeses: + dims = {arr.shape[axis] for axis in axes} + if len(dims) >= 2: + dim1 = dims.pop() + dim0 = dims.pop() + raise ValueError( + f"dimensions in operand {idx} " + f"for collapsing index '{label}' don't match ({dim0} != {dim1})" + ) + + sub, axeses = zip(*axeses) # axeses is not empty + input_subscripts[idx] = list(sub) + operands[idx] = _transpose_ex(arr, axeses) + + +def _expand_dims_transpose(arr, mode, mode_out): + """ + Copied from _expand_dims_transpose in cupy/core/_einsum.py + + Return a reshaped and transposed array. + + The input array `arr` having `mode` as its modes is reshaped and + transposed so that modes of the output becomes `mode_out`. + + Parameters + ---------- + arr : {dpnp.ndarray, usm_ndarray} + Input array. + mode : tuple or list + The modes of input array. + mode_out : tuple or list + The modes of output array. + + Returns + ------- + out : dpnp.ndarray + The reshaped and transposed array. + + Example + ------- + >>> import dpnp + >>> import dpnp.dpnp_utils.dpnp_utils_linearalgebra as np_util + >>> a = dpnp.zeros((10, 20)) + >>> mode_a = ("A", "B") + >>> mode_out = ("B", "C", "A") + >>> out = np_util._expand_dims_transpose(a, mode_a, mode_out) + >>> out.shape + (20, 1, 10) + + """ + mode = list(mode) + shape = list(arr.shape) + axes = [] + for i in mode_out: + if i not in mode: + mode.append(i) + shape.append(1) + axes.append(mode.index(i)) + return dpnp.transpose(arr.reshape(shape), axes) + + +def _find_contraction(positions, input_sets, output_set): + """ + Copied from _find_contraction in numpy/core/einsumfunc.py + + Finds the contraction for a given set of input and output sets. + + Parameters + ---------- + positions : iterable + Integer positions of terms used in the contraction. + input_sets : list of sets + List of sets that represent the lhs side of the einsum subscript + output_set : set + Set that represents the rhs side of the overall einsum subscript + + Returns + ------- + new_result : set + The indices of the resulting contraction + remaining : list of sets + List of sets that have not been contracted, the new set is appended to + the end of this list + idx_removed : set + Indices removed from the entire contraction + idx_contraction : set + The indices used in the current contraction + + Examples + -------- + # A simple dot product test case + >>> import dpnp.dpnp_utils.dpnp_utils_linearalgebra as np_util + >>> pos = (0, 1) + >>> isets = [set("ab"), set("bc")] + >>> oset = set("ac") + >>> np_util._find_contraction(pos, isets, oset) + ({'a', 'c'}, [{'a', 'c'}], {'b'}, {'a', 'b', 'c'}) + + # A more complex case with additional terms in the contraction + >>> pos = (0, 2) + >>> isets = [set("abd"), set("ac"), set("bdc")] + >>> oset = set("ac") + >>> np_util._find_contraction(pos, isets, oset) + ({'a', 'c'}, [{'a', 'c'}, {'a', 'c'}], {'b', 'd'}, {'a', 'b', 'c', 'd'}) + + """ + + idx_contract = set() + idx_remain = output_set.copy() + remaining = [] + for ind, value in enumerate(input_sets): + if ind in positions: + idx_contract |= value + else: + remaining.append(value) + idx_remain |= value + + new_result = idx_remain & idx_contract + idx_removed = idx_contract - new_result + remaining.append(new_result) + + return (new_result, remaining, idx_removed, idx_contract) + + +def _flatten_transpose(a, axeses): + """ + Copied from _flatten_transpose in cupy/core/_einsum.py + + Transpose and flatten each + + Parameters + ---------- + a : {dpnp.ndarray, usm_ndarray} + Input array. + axeses : sequence of sequences of ints + Axeses + + Returns + ------- + out : dpnp.ndarray + `a` with its axes permutated and flatten. + shapes : tuple + flattened shapes. + + Examples + -------- + >>> import dpnp as np + >>> import dpnp.dpnp_utils.dpnp_utils_linearalgebra as np_util + >>> a = np.arange(24).reshape(2, 3, 4) + >>> axeses = [(0, 2), (1,)] + >>> out, shapes = np_util._flatten_transpose(a, axeses) + >>> out.shape + (8, 3) + >>> shapes + [(2, 4), (3,)] + + """ + + transpose_axes = [] + shapes = [] + for axes in axeses: + transpose_axes.extend(axes) + shapes.append([a.shape[axis] for axis in axes]) + + return ( + dpnp.transpose(a, transpose_axes).reshape( + tuple([int(numpy.prod(shape)) for shape in shapes]) + ), + shapes, + ) + + +def _flop_count(idx_contraction, inner, num_terms, size_dictionary): + """ + Copied from _flop_count in numpy/core/einsumfunc.py + + Computes the number of FLOPS in the contraction. + + Parameters + ---------- + idx_contraction : iterable + The indices involved in the contraction + inner : bool + Does this contraction require an inner product? + num_terms : int + The number of terms in a contraction + size_dictionary : dict + The size of each of the indices in idx_contraction + + Returns + ------- + flop_count : int + The total number of FLOPS required for the contraction. + + Examples + -------- + >>> import dpnp.dpnp_utils.dpnp_utils_linearalgebra as np_util + >>> np_util._flop_count("abc", False, 1, {"a": 2, "b":3, "c":5}) + 30 + + >>> np_util._flop_count("abc", True, 2, {"a": 2, "b":3, "c":5}) + 60 + + """ + + overall_size = _compute_size_by_dict(idx_contraction, size_dictionary) + op_factor = max(1, num_terms - 1) + if inner: + op_factor += 1 + + return overall_size * op_factor + + def _gemm_batch_matmul(exec_q, x1, x2, res, x1_is_2D, x2_is_2D, dev_tasks_list): # If input array is F-contiguous, we need to change the order to C-contiguous. # because mkl::gemm_bacth needs each 2D array to be F-contiguous but @@ -153,6 +539,199 @@ def _gemm_batch_matmul(exec_q, x1, x2, res, x1_is_2D, x2_is_2D, dev_tasks_list): return ht_blas_ev, ht_tasks_list, res +def _greedy_path(input_sets, output_set, idx_dict, memory_limit): + """ + Copied from _greedy_path in numpy/core/einsumfunc.py + + Finds the path by contracting the best pair until the input list is + exhausted. The best pair is found by minimizing the tuple + ``(-prod(indices_removed), cost)``. What this amounts to is prioritizing + matrix multiplication or inner product operations, then Hadamard like + operations, and finally outer operations. Outer products are limited by + ``memory_limit``. This algorithm scales cubically with respect to the + number of elements in the list ``input_sets``. + + Parameters + ---------- + input_sets : list of sets + List of sets that represent the lhs side of the einsum subscript + output_set : set + Set that represents the rhs side of the overall einsum subscript + idx_dict : dictionary + Dictionary of index sizes + memory_limit : int + The maximum number of elements in a temporary array + + Returns + ------- + path : list + The greedy contraction order within the memory limit constraint. + + Examples + -------- + >>> import dpnp.dpnp_utils.dpnp_utils_linearalgebra as np_util + >>> isets = [set("abd"), set("ac"), set("bdc")] + >>> oset = set("") + >>> idx_sizes = {"a": 1, "b":2, "c":3, "d":4} + >>> _greedy_path(isets, oset, idx_sizes, 5000) + [(0, 2), (0, 1)] + + """ + + # Handle trivial cases that leaked through + if len(input_sets) == 1: + return [(0,)] + elif len(input_sets) == 2: + return [(0, 1)] + + # Build up a naive cost + _, _, idx_removed, idx_contract = _find_contraction( + range(len(input_sets)), input_sets, output_set + ) + naive_cost = _flop_count( + idx_contract, idx_removed, len(input_sets), idx_dict + ) + + # Initially iterate over all pairs + comb_iter = itertools.combinations(range(len(input_sets)), 2) + known_contractions = [] + + path_cost = 0 + path = [] + for _ in range(len(input_sets) - 1): + # Iterate over all pairs on first step, only previously found pairs on subsequent steps + for positions in comb_iter: + # Always initially ignore outer products + if input_sets[positions[0]].isdisjoint(input_sets[positions[1]]): + continue + + result = _parse_possible_contraction( + positions, + input_sets, + output_set, + idx_dict, + memory_limit, + path_cost, + naive_cost, + ) + if result is not None: + known_contractions.append(result) + + # If we do not have a inner contraction, rescan pairs including outer products + if len(known_contractions) == 0: + # Then check the outer products + for positions in itertools.combinations(range(len(input_sets)), 2): + result = _parse_possible_contraction( + positions, + input_sets, + output_set, + idx_dict, + memory_limit, + path_cost, + naive_cost, + ) + if result is not None: + known_contractions.append(result) + + # If we still did not find any remaining contractions, default back to einsum like behavior + if len(known_contractions) == 0: + path.append(tuple(range(len(input_sets)))) + break + + # Sort based on first index + best = min(known_contractions, key=lambda x: x[0]) + + # Now propagate as many unused contractions as possible to next iteration + known_contractions = _update_other_results(known_contractions, best) + + # Next iteration only compute contractions with the new tensor + # All other contractions have been accounted for + input_sets = best[2] + new_tensor_pos = len(input_sets) - 1 + comb_iter = ((i, new_tensor_pos) for i in range(new_tensor_pos)) + + # Update path and total cost + path.append(best[1]) + path_cost += best[0][1] + + return path + + +def _iter_path_pairs(path): + """ + Copied from _iter_path_pairs in cupy/core/_einsum.py + + Decompose path into binary path + + Parameters + ---------- + path : sequence of tuples of ints + + Yields + ------ + tuple of ints + pair (idx0, idx1) that represents the operation + {pop(idx0); pop(idx1); append();} + + """ + + for indices in path: + assert all(idx >= 0 for idx in indices) + # [3, 1, 4, 9] -> [(9, 4), (-1, 3), (-1, 1)] + if len(indices) >= 2: + indices = sorted(indices, reverse=True) + yield indices[0], indices[1] + for idx in indices[2:]: + yield -1, idx + + +def _index_linear_to_tuple(shape, linear_id): + """ + Convert a linear index to a tuple of indices in a multi-dimensional array. + + Parameters + ---------- + shape : tuple + The shape of the multi-dimensional array. + linear_id : int + The linear index to convert. + + Returns + ------- + out: tuple + A tuple of indices corresponding to the linear index. + + """ + + len_shape = len(shape) + indices = [0] * len_shape + for i in range(len_shape): + prod_res = _compute_size(i + 1, shape) + indices[i] = linear_id // prod_res + linear_id %= prod_res + + return tuple(indices) + + +def _make_transpose_axes(sub, b_dims, c_dims): + """Copied from _make_transpose_axes in cupy/core/_einsum.py""" + bs = [] + cs = [] + ts = [] + for axis, label in enumerate(sub): + if label in b_dims: + bs.append((label, axis)) + elif label in c_dims: + cs.append((label, axis)) + else: + ts.append((label, axis)) + return ( + _tuple_sorted_by_0(bs), + _tuple_sorted_by_0(cs), + _tuple_sorted_by_0(ts), + ) + + def _op_res_dtype(*arrays, dtype, casting, sycl_queue): """ _op_res_dtype(*arrays, dtype, casting, sycl_queue) @@ -176,7 +755,7 @@ def _op_res_dtype(*arrays, dtype, casting, sycl_queue): Input arrays. dtype : dtype If not ``None``, data type of the output array. - casting : {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}, optional + casting : {"no", "equiv", "safe", "same_kind", "unsafe"}, optional Controls what kind of data casting may occur. sycl_queue : {SyclQueue} A SYCL queue to use for determining default floating point datat type. @@ -210,6 +789,336 @@ def _op_res_dtype(*arrays, dtype, casting, sycl_queue): return op_dtype, res_dtype +def _optimal_path(input_sets, output_set, idx_dict, memory_limit): + """ + Copied from _optimal_path in numpy/core/einsumfunc.py + + Computes all possible pair contractions, sieves the results based + on ``memory_limit`` and returns the lowest cost path. This algorithm + scales factorial with respect to the elements in the list ``input_sets``. + + Parameters + ---------- + input_sets : list of sets + List of sets that represent the lhs side of the einsum subscript + output_set : set + Set that represents the rhs side of the overall einsum subscript + idx_dict : dictionary + Dictionary of index sizes + memory_limit : int + The maximum number of elements in a temporary array + + Returns + ------- + path : list + The optimal contraction order within the memory limit constraint. + + Examples + -------- + >>> import dpnp.dpnp_utils.dpnp_utils_linearalgebra as np_util + >>> isets = [set("abd"), set("ac"), set("bdc")] + >>> oset = set("") + >>> idx_sizes = {"a": 1, "b":2, "c":3, "d":4} + >>> np_util._optimal_path(isets, oset, idx_sizes, 5000) + [(0, 2), (0, 1)] + + """ + + full_results = [(0, [], input_sets)] + for iteration in range(len(input_sets) - 1): + iter_results = [] + + # Compute all unique pairs + for curr in full_results: + cost, positions, remaining = curr + for con in itertools.combinations( + range(len(input_sets) - iteration), 2 + ): + # Find the contraction + cont = _find_contraction(con, remaining, output_set) + new_result, new_input_sets, idx_removed, idx_contract = cont + + # Sieve the results based on memory_limit + new_size = _compute_size_by_dict(new_result, idx_dict) + if new_size > memory_limit: + continue + + # Build (total_cost, positions, indices_remaining) + total_cost = cost + _flop_count( + idx_contract, idx_removed, len(con), idx_dict + ) + new_pos = positions + [con] + iter_results.append((total_cost, new_pos, new_input_sets)) + + # Update combinatorial list, if we did not find anything return best + # path + remaining contractions + if iter_results: + full_results = iter_results + else: + path = min(full_results, key=lambda x: x[0])[1] + path += [tuple(range(len(input_sets) - iteration))] + return path + + path = min(full_results, key=lambda x: x[0])[1] + return path + + +def _parse_einsum_input(args): + """ + Copied from _parse_einsum_input in cupy/core/_einsum.py + + Parse einsum operands. + + Parameters + ---------- + args : tuple + The non-keyword arguments to einsum. + + Returns + ------- + input_strings : str + Parsed input strings. + output_string : str + Parsed output string. + operands : list of array_like + The operands to use in the contraction. + + Examples + -------- + The operand list is simplified to reduce printing: + + >>> import dpnp as np + >>> import dpnp.dpnp_utils.dpnp_utils_linearalgebra as np_util + >>> a = np.random.rand(4, 4) + >>> b = np.random.rand(4, 4, 4) + >>> np_util._parse_einsum_input(("...a,...a->...", a, b)) + (['@a', '@a'], '@', [a, b]) + + >>> np_util._parse_einsum_input((a, [Ellipsis, 0], b, [Ellipsis, 0])) + (['@a', '@b'], None, [a, b]) + + """ + + if len(args) == 0: + raise ValueError( + "must specify the einstein sum subscripts string and at least one " + "operand, or at least one operand and its corresponding " + "subscripts list" + ) + + if isinstance(args[0], str): + subscripts = args[0].replace(" ", "") + operands = list(args[1:]) + + # Ensure all characters are valid + for s in subscripts: + if s in ".,->": + continue + if s not in _einsum_symbols: + raise ValueError( + f"invalid subscript '{s}' in einstein sum subscripts " + "string, subscripts must be letters" + ) + + # Parse "..." + subscripts = subscripts.replace("...", "@") + if "." in subscripts: + raise ValueError( + "einstein sum subscripts string contains a '.' that is not " + "part of an ellipsis ('...')" + ) + + # Parse "->" + if ("-" in subscripts) or (">" in subscripts): + # Check for proper "->" + invalid = subscripts.count("-") > 1 or subscripts.count(">") > 1 + subscripts = subscripts.split("->") + if invalid or len(subscripts) != 2: + raise ValueError( + "einstein sum subscript string does not contain proper " + "'->' output specified" + ) + input_subscripts, output_subscript = subscripts + + else: + input_subscripts = subscripts + output_subscript = None + + input_subscripts = input_subscripts.split(",") + if len(input_subscripts) != len(operands): + msg = "more" if len(operands) > len(input_subscripts) else "fewer" + raise ValueError( + msg + " operands provided to einstein sum function than " + "specified in the subscripts string" + ) + else: + args = list(args) + operands = [] + input_subscripts = [] + while len(args) >= 2: + operands.append(args.pop(0)) + input_subscripts.append(_parse_int_subscript(args.pop(0))) + if args: + output_subscript = _parse_int_subscript(args[0]) + else: + output_subscript = None + + return input_subscripts, output_subscript, operands + + +def _parse_ellipsis_subscript(subscript, idx, ndim=None, ellipsis_len=None): + """ + Copied from _parse_ellipsis_subscript in cupy/core/_einsum.py + + Parse a subscript that may contain ellipsis + + Parameters + ---------- + subscript : str + An einsum subscript of an operand or an output. "..." + should be replaced by "@". + idx : {int, ``None``} + For error messages, give int idx for the idx-th operand or ``None`` + for the output. + ndim : int, optional + ndim of the operand + ellipsis_len : int, optional + number of broadcast dimensions of the output. + + Returns + ------- + out : list of ints + The parsed subscript + + """ + subs = subscript.split("@") + if len(subs) == 1: + (sub,) = subs + if ndim is not None and len(sub) != ndim: + if len(sub) > ndim: + raise ValueError( + f"einstein sum subscripts string {sub} contains too many " + f"subscripts for operand {idx}" + ) + raise ValueError( + f"operand {idx} has more dimensions than subscripts string " + f"{sub} given in einstein sum, but no '...' ellipsis " + "provided to broadcast the extra dimensions." + ) + return [ord(label) for label in sub] + elif len(subs) == 2: + left_sub, right_sub = subs + if ndim is not None: + ellipsis_len = ndim - (len(left_sub) + len(right_sub)) + if ellipsis_len < 0: + raise ValueError( + f"einstein sum subscripts string {left_sub}...{right_sub} " + f"contains too many subscripts for operand {idx}" + ) + ret = [] + ret.extend(ord(label) for label in left_sub) + ret.extend(range(-ellipsis_len, 0)) + ret.extend(ord(label) for label in right_sub) + return ret + else: + # >= 2 ellipses for an operand + raise ValueError( + "einstein sum subscripts string contains a '.' that is not " + "part of an ellipsis ('...') " + + ("in the output" if idx is None else f"for operand {idx}") + ) + + +def _parse_int_subscript(list_subscript): + """Copied from _parse_int_subscript in cupy/core/_einsum.py""" + + str_subscript = "" + for s in list_subscript: + if s is Ellipsis: + str_subscript += "@" + else: + try: + s = operator.index(s) + except TypeError as e: + raise TypeError( + "For this input type lists must contain " + "either int or Ellipsis" + ) from e + str_subscript += _einsum_symbols[s] + return str_subscript + + +def _parse_possible_contraction( + positions, + input_sets, + output_set, + idx_dict, + memory_limit, + path_cost, + naive_cost, +): + """ + Copied from _parse_possible_contraction in numpy/core/einsumfunc.py + + Compute the cost (removed size + flops) and resultant indices for + performing the contraction specified by ``positions``. + + Parameters + ---------- + positions : tuple of int + The locations of the proposed tensors to contract. + input_sets : list of sets + The indices found on each tensors. + output_set : set + The output indices of the expression. + idx_dict : dict + Mapping of each index to its size. + memory_limit : int + The total allowed size for an intermediary tensor. + path_cost : int + The contraction cost so far. + naive_cost : int + The cost of the unoptimized expression. + + Returns + ------- + cost : (int, int) + A tuple containing the size of any indices removed, and the flop cost. + positions : tuple of int + The locations of the proposed tensors to contract. + new_input_sets : list of sets + The resulting new list of indices if this proposed contraction is performed. + + """ + + # Find the contraction + contract = _find_contraction(positions, input_sets, output_set) + idx_result, new_input_sets, idx_removed, idx_contract = contract + + # Sieve the results based on memory_limit + new_size = _compute_size_by_dict(idx_result, idx_dict) + if new_size > memory_limit: + return None + + # Build sort tuple + old_sizes = ( + _compute_size_by_dict(input_sets[p], idx_dict) for p in positions + ) + removed_size = sum(old_sizes) - new_size + + # NB: removed_size used to be just the size of any removed indices i.e.: + # helpers.compute_size_by_dict(idx_removed, idx_dict) + cost = _flop_count(idx_contract, idx_removed, len(positions), idx_dict) + sort = (-removed_size, cost) + + # Sieve based on total cost as well + if (path_cost + cost) >= naive_cost: + return None + + # Add contraction to possible choices + return [sort, positions, new_input_sets] + + def _shape_error(a, b, core_dim, err_msg): if err_msg == 0: raise ValueError( @@ -235,6 +1144,50 @@ def _shape_error(a, b, core_dim, err_msg): ) +def _reduced_binary_einsum(arr0, sub0, arr1, sub1, sub_others): + """Copied from _reduced_binary_einsum in cupy/core/_einsum.py""" + + set0 = set(sub0) + set1 = set(sub1) + assert len(set0) == len(sub0), "operand 0 should be reduced: diagonal" + assert len(set1) == len(sub1), "operand 1 should be reduced: diagonal" + + if len(sub0) == 0 or len(sub1) == 0: + return arr0 * arr1, sub0 + sub1 + + set_others = set(sub_others) + shared = set0 & set1 + batch_dims = shared & set_others + contract_dims = shared - batch_dims + + bs0, cs0, ts0 = _make_transpose_axes(sub0, batch_dims, contract_dims) + bs1, cs1, ts1 = _make_transpose_axes(sub1, batch_dims, contract_dims) + + sub_b = [sub0[axis] for axis in bs0] + assert sub_b == [sub1[axis] for axis in bs1] + sub_l = [sub0[axis] for axis in ts0] + sub_r = [sub1[axis] for axis in ts1] + + sub_out = sub_b + sub_l + sub_r + assert set(sub_out) <= set_others, "operands should be reduced: unary sum" + + if len(contract_dims) == 0: + # Use element-wise multiply when no contraction is needed + if len(sub_out) == len(sub_others): + # to assure final output of einsum is C-contiguous + sub_out = sub_others + arr0 = _expand_dims_transpose(arr0, sub0, sub_out) + arr1 = _expand_dims_transpose(arr1, sub1, sub_out) + return arr0 * arr1, sub_out + + tmp0, shapes0 = _flatten_transpose(arr0, [bs0, ts0, cs0]) + tmp1, shapes1 = _flatten_transpose(arr1, [bs1, cs1, ts1]) + shapes_out = shapes0[0] + shapes0[1] + shapes1[2] + assert shapes0[0] == shapes1[0] + arr_out = dpnp.matmul(tmp0, tmp1).reshape(shapes_out) + return arr_out, sub_out + + def _standardize_strides(strides, inherently_2D, shape, ndim): """ Standardizing the strides. @@ -266,6 +1219,85 @@ def _standardize_strides(strides, inherently_2D, shape, ndim): return stndrd_strides +def _tuple_sorted_by_0(zs): + """Copied from _tuple_sorted_by_0 in cupy/core/_einsum.py""" + return tuple(i for _, i in sorted(zs)) + + +def _transpose_ex(a, axeses): + """ + Copied from _transpose_ex in cupy/core/_einsum.py + + Transpose and diagonal + + Parameters + ---------- + a : {dpnp.ndarray, usm_ndarray} + Input array. + axeses : sequence of sequences of ints + Axeses + + Returns + ------- + out : dpnp.ndarray + `a` with its axes permutated. A writeable view is returned + whenever possible. + """ + + shape = [] + strides = [] + for axes in axeses: + shape.append(a.shape[axes[0]] if axes else 1) + stride = sum(a.strides[axis] for axis in axes) + strides.append(stride) + + # TODO: replace with a.view() when it is implemented in dpnp + a = _view_work_around(a, shape, strides) + return a + + +def _update_other_results(results, best): + """ + Copied from _update_other_results in numpy/core/einsumfunc.py + + Update the positions and provisional input_sets of ``results`` based on + performing the contraction result ``best``. Remove any involving the tensors + contracted. + + Parameters + ---------- + results : list + List of contraction results produced by ``_parse_possible_contraction``. + best : list + The best contraction of ``results`` i.e. the one that will be performed. + + Returns + ------- + mod_results : list + The list of modified results, updated with outcome of ``best`` contraction. + """ + + best_con = best[1] + bx, by = best_con + mod_results = [] + + for cost, (x, y), con_sets in results: + # Ignore results involving tensors just contracted + if x in best_con or y in best_con: + continue + + # Update the input_sets + del con_sets[by - int(by > x) - int(by > y)] + del con_sets[bx - int(bx > x) - int(bx > y)] + con_sets.insert(-1, best[2][-1]) + + # Update the position indices + mod_con = x - int(x > bx) - int(x > by), y - int(y > bx) - int(y > by) + mod_results.append((cost, mod_con, con_sets)) + + return mod_results + + def _validate_axes(x1, x2, axes): """Check axes is valid for matmul function.""" @@ -321,6 +1353,39 @@ def _validate_internal(axes, i, ndim): return axes +def _view_work_around(a, shape, strides): + """ + Create a copy of the input array with the specified shape and strides. + + Parameters + ---------- + a : {dpnp.ndarray, usm_ndarray} + The input array. + shape : tuple + The desired shape of the output array. + strides : tuple + The desired strides of the output array. + + Returns + ------- + out : dpnp.ndarray + A copy of the input array with the specified shape and strides. + + """ + + n_size = numpy.prod(shape) + b = dpnp.empty( + n_size, dtype=a.dtype, usm_type=a.usm_type, sycl_queue=a.sycl_queue + ) + for linear_id in range(n_size): + offset = _calc_offset(shape, linear_id, strides) + indices = _index_linear_to_tuple(a.shape, offset) + b[linear_id] = a[indices] + b = b.reshape(tuple(shape)) + + return b + + def dpnp_cross(a, b, cp, exec_q): """Return the cross product of two (arrays of) vectors.""" @@ -476,6 +1541,217 @@ def dpnp_cross(a, b, cp, exec_q): return cp +def dpnp_einsum( + *operands, out=None, dtype=None, order="K", casting="safe", optimize=False +): + """Evaluates the Einstein summation convention on the operands.""" + + input_subscripts, output_subscript, operands = _parse_einsum_input(operands) + assert isinstance(input_subscripts, list) + assert isinstance(operands, list) + + dpnp.check_supported_arrays_type(*operands, scalar_type=True) + arrays = [] + for a in operands: + if dpnp.is_supported_array_type(a): + arrays.append(a) + + res_usm_type, exec_q = get_usm_allocations(arrays) + result_dtype = dpnp.result_type(*arrays) if dtype is None else dtype + for id, a in enumerate(operands): + if dpnp.isscalar(a): + operands[id] = dpnp.array( + a, dtype=result_dtype, usm_type=res_usm_type, sycl_queue=exec_q + ) + + input_subscripts = [ + _parse_ellipsis_subscript(sub, idx, ndim=arr.ndim) + for idx, (sub, arr) in enumerate(zip(input_subscripts, operands)) + ] + + # Get length of each unique dimension and ensure all dimensions are correct + dimension_dict = {} + for idx, sub in enumerate(input_subscripts): + sh = operands[idx].shape + for axis, label in enumerate(sub): + dim = sh[axis] + if label in dimension_dict.keys(): + # For broadcasting cases we always want the largest dim size + if dimension_dict[label] == 1: + dimension_dict[label] = dim + elif dim not in (1, dimension_dict[label]): + dim_old = dimension_dict[label] + raise ValueError( + f"Size of label '{_chr(label)}' for operand {idx} ({dim}) " + f"does not match previous terms ({dim_old})." + ) + else: + dimension_dict[label] = dim + + if output_subscript is None: + # Build output subscripts + tmp_subscripts = list(itertools.chain.from_iterable(input_subscripts)) + output_subscript = [ + label + for label in sorted(set(tmp_subscripts)) + if label < 0 or tmp_subscripts.count(label) == 1 + ] + else: + if "@" not in output_subscript and -1 in dimension_dict: + raise ValueError( + "output has more dimensions than subscripts " + "given in einstein sum, but no '...' ellipsis " + "provided to broadcast the extra dimensions." + ) + output_subscript = _parse_ellipsis_subscript( + output_subscript, + None, + ellipsis_len=sum(label < 0 for label in dimension_dict.keys()), + ) + + # Make sure output subscripts are in the input + tmp_subscripts = set(itertools.chain.from_iterable(input_subscripts)) + for label in output_subscript: + if label not in tmp_subscripts: + raise ValueError( + "einstein sum subscripts string included output subscript " + f"'{_chr(label)}' which never appeared in an input." + ) + if len(output_subscript) != len(set(output_subscript)): + for label in output_subscript: + if output_subscript.count(label) >= 2: + raise ValueError( + "einstein sum subscripts string includes output " + f"subscript '{_chr(label)}' multiple times." + ) + + _einsum_diagonals(input_subscripts, operands) + + # no more raises + if len(operands) >= 2: + if any(arr.size == 0 for arr in operands): + return dpnp.zeros( + tuple(dimension_dict[label] for label in output_subscript), + dtype=result_dtype, + usm_type=res_usm_type, + sycl_queue=exec_q, + ) + + # Don't squeeze if unary, because this affects later (in trivial sum) + # whether the return is a writeable view. + for idx in range(len(operands)): + arr = operands[idx] + if 1 in arr.shape: + squeeze_indices = [] + sub = [] + for axis, label in enumerate(input_subscripts[idx]): + if arr.shape[axis] == 1: + squeeze_indices.append(axis) + else: + sub.append(label) + input_subscripts[idx] = sub + operands[idx] = dpnp.squeeze(arr, axis=tuple(squeeze_indices)) + assert operands[idx].ndim == len(input_subscripts[idx]) + del arr + + # unary einsum without summation should return a (writeable) view + returns_view = len(operands) == 1 + + # unary sum + for idx, sub in enumerate(input_subscripts): + other_subscripts = copy.copy(input_subscripts) + other_subscripts[idx] = output_subscript + other_subscripts = set(itertools.chain.from_iterable(other_subscripts)) + sum_axes = tuple( + axis + for axis, label in enumerate(sub) + if label not in other_subscripts + ) + if sum_axes: + returns_view = False + input_subscripts[idx] = [ + label for axis, label in enumerate(sub) if axis not in sum_axes + ] + + operands[idx] = operands[idx].sum(axis=sum_axes, dtype=result_dtype) + + if returns_view: + # TODO: replace with a.view() when it is implemented in dpnp + operands = [a for a in operands] + else: + operands = [ + dpnp.astype( + a, result_dtype, copy=False, casting=casting, order=order + ) + for a in operands + ] + + # no more casts + optimize_algorithms = { + "greedy": _greedy_path, + "optimal": _optimal_path, + } + if optimize is False: + path = [tuple(range(len(operands)))] + elif len(optimize) and (optimize[0] == "einsum_path"): + path = optimize[1:] + else: + try: + if len(optimize) == 2 and isinstance(optimize[1], (int, float)): + algo = optimize_algorithms[optimize[0]] + memory_limit = int(optimize[1]) + else: + algo = optimize_algorithms[optimize] + memory_limit = 2**31 + except (TypeError, KeyError): # unhashable type or not found + raise TypeError( + f"Did not understand the path (optimize): {str(optimize)}" + ) + input_sets = [set(sub) for sub in input_subscripts] + output_set = set(output_subscript) + path = algo(input_sets, output_set, dimension_dict, memory_limit) + if any(len(indices) > 2 for indices in path): + warnings.warn( + "memory efficient einsum is not supported yet", + RuntimeWarning, + stacklevel=2, + ) + + for idx0, idx1 in _iter_path_pairs(path): + # "reduced" binary einsum + arr0 = operands.pop(idx0) + sub0 = input_subscripts.pop(idx0) + arr1 = operands.pop(idx1) + sub1 = input_subscripts.pop(idx1) + sub_others = list( + itertools.chain( + output_subscript, + itertools.chain.from_iterable(input_subscripts), + ) + ) + arr_out, sub_out = _reduced_binary_einsum( + arr0, sub0, arr1, sub1, sub_others + ) + operands.append(arr_out) + input_subscripts.append(sub_out) + del arr0, arr1 + + # unary einsum at last + (arr0,) = operands + (sub0,) = input_subscripts + + transpose_axes = [] + for label in output_subscript: + if label in sub0: + transpose_axes.append(sub0.index(label)) + + arr_out = arr0.transpose(transpose_axes).reshape( + [dimension_dict[label] for label in output_subscript] + ) + assert returns_view or arr_out.dtype == result_dtype + return dpnp.get_result_array(arr_out, out) + + def dpnp_kron(a, b, a_ndim, b_ndim): """Returns the kronecker product of two arrays.""" @@ -672,8 +1948,8 @@ def dpnp_matmul( # find the result shape if x1_is_2D and x2_is_2D: - x1 = x1.reshape(x1.shape[-2], x1.shape[-1]) - x2 = x2.reshape(x2.shape[-2], x2.shape[-1]) + x1 = dpnp.reshape(x1, (x1.shape[-2], x1.shape[-1])) + x2 = dpnp.reshape(x2, (x2.shape[-2], x2.shape[-1])) res_shape = (x1.shape[-2], x2.shape[-1]) else: # makes the dimension of input the same by adding new axis diff --git a/dpnp/linalg/dpnp_iface_linalg.py b/dpnp/linalg/dpnp_iface_linalg.py index b10df5863f72..ed852054e284 100644 --- a/dpnp/linalg/dpnp_iface_linalg.py +++ b/dpnp/linalg/dpnp_iface_linalg.py @@ -781,7 +781,9 @@ def matrix_rank(A, tol=None, hermitian=False): dpnp.check_supported_arrays_type(A) if tol is not None: - dpnp.check_supported_arrays_type(tol, scalar_type=True) + dpnp.check_supported_arrays_type( + tol, scalar_type=True, all_scalars=True + ) return dpnp_matrix_rank(A, tol=tol, hermitian=hermitian) @@ -895,7 +897,7 @@ def pinv(a, rcond=1e-15, hermitian=False): """ dpnp.check_supported_arrays_type(a) - dpnp.check_supported_arrays_type(rcond, scalar_type=True) + dpnp.check_supported_arrays_type(rcond, scalar_type=True, all_scalars=True) assert_stacked_2d(a) return dpnp_pinv(a, rcond=rcond, hermitian=hermitian) diff --git a/tests/skipped_tests.tbl b/tests/skipped_tests.tbl index 9ef4091f7482..d6fd82b9e3e3 100644 --- a/tests/skipped_tests.tbl +++ b/tests/skipped_tests.tbl @@ -263,29 +263,6 @@ tests/third_party/cupy/indexing_tests/test_iterate.py::TestFlatiterSubscriptInde tests/third_party/cupy/indexing_tests/test_iterate.py::TestFlatiterSubscriptIndexError_param_3_{index=[0], shape=(2, 3, 4)}::test_getitem tests/third_party/cupy/indexing_tests/test_iterate.py::TestFlatiterSubscriptIndexError_param_3_{index=[0], shape=(2, 3, 4)}::test_setitem -tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumBinaryOperationWithScalar::test_scalar_1 -tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumBinaryOperationWithScalar::test_scalar_2 -tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumLarge_param_10_{opt='optimal', subscript='chd,bde,agbc,hiad,bdi,cgh,agdb'}::test_einsum -tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumLarge_param_11_{opt='optimal', subscript='eb,cb,fb->cef'}::test_einsum -tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumLarge_param_12_{opt='optimal', subscript='dd,fb,be,cdb->cef'}::test_einsum -tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumLarge_param_13_{opt='optimal', subscript='bca,cdb,dbf,afc->'}::test_einsum -tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumLarge_param_14_{opt='optimal', subscript='dcc,fce,ea,dbf->ab'}::test_einsum -tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumLarge_param_15_{opt='optimal', subscript='a,ac,ab,ad,cd,bd,bc->'}::test_einsum -tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumLarge_param_1_{opt=('greedy', 0), subscript='acdf,jbje,gihb,hfac'}::test_einsum -tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumLarge_param_2_{opt='greedy', subscript='acdf,jbje,gihb,hfac,gfac,gifabc,hfac'}::test_einsum -tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumLarge_param_3_{opt='greedy', subscript='chd,bde,agbc,hiad,bdi,cgh,agdb'}::test_einsum -tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumLarge_param_4_{opt='greedy', subscript='eb,cb,fb->cef'}::test_einsum -tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumLarge_param_5_{opt='greedy', subscript='dd,fb,be,cdb->cef'}::test_einsum -tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumLarge_param_6_{opt='greedy', subscript='bca,cdb,dbf,afc->'}::test_einsum -tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumLarge_param_7_{opt='greedy', subscript='dcc,fce,ea,dbf->ab'}::test_einsum -tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumLarge_param_8_{opt='greedy', subscript='a,ac,ab,ad,cd,bd,bc->'}::test_einsum -tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumLarge_param_9_{opt='optimal', subscript='acdf,jbje,gihb,hfac,gfac,gifabc,hfac'}::test_einsum -tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumUnaryOperationWithScalar::test_scalar_float -tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumUnaryOperationWithScalar::test_scalar_int -tests/third_party/cupy/linalg_tests/test_einsum.py::TestListArgEinSumError::test_dim_mismatch3 -tests/third_party/cupy/linalg_tests/test_einsum.py::TestListArgEinSumError::test_invalid_sub1 -tests/third_party/cupy/linalg_tests/test_einsum.py::TestListArgEinSumError::test_too_many_dims3 - tests/third_party/cupy/manipulation_tests/test_dims.py::TestBroadcast_param_0_{shapes=[(), ()]}::test_broadcast tests/third_party/cupy/manipulation_tests/test_dims.py::TestBroadcast_param_10_{shapes=[(0, 1, 1, 0, 3), (5, 2, 0, 1, 0, 0, 3), (2, 1, 0, 0, 0, 3)]}::test_broadcast tests/third_party/cupy/manipulation_tests/test_dims.py::TestBroadcast_param_1_{shapes=[(0,), (0,)]}::test_broadcast diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl index 4688c1787fc1..73087bde65e0 100644 --- a/tests/skipped_tests_gpu.tbl +++ b/tests/skipped_tests_gpu.tbl @@ -87,9 +87,6 @@ tests/third_party/cupy/core_tests/test_ndarray_conversion.py::TestNdarrayToBytes tests/third_party/cupy/core_tests/test_ndarray_conversion.py::TestNdarrayToBytes_param_3_{order='C', shape=(2, 3)}::test_item tests/third_party/cupy/core_tests/test_ndarray_conversion.py::TestNdarrayToBytes_param_4_{order='F', shape=(2, 3)}::test_item -tests/third_party/cupy/linalg_tests/test_einsum.py::TestListArgEinSumError::test_dim_mismatch3 -tests/third_party/cupy/linalg_tests/test_einsum.py::TestListArgEinSumError::test_too_many_dims3 - tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsMultivariateNormal_param_0_{d=2, shape=(4, 3, 2)}::test_normal tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsMultivariateNormal_param_1_{d=2, shape=(3, 2)}::test_normal tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsMultivariateNormal_param_2_{d=4, shape=(4, 3, 2)}::test_normal @@ -333,27 +330,6 @@ tests/third_party/cupy/indexing_tests/test_iterate.py::TestFlatiterSubscriptInde tests/third_party/cupy/indexing_tests/test_iterate.py::TestFlatiterSubscriptIndexError_param_3_{index=[0], shape=(2, 3, 4)}::test_getitem tests/third_party/cupy/indexing_tests/test_iterate.py::TestFlatiterSubscriptIndexError_param_3_{index=[0], shape=(2, 3, 4)}::test_setitem -tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumBinaryOperationWithScalar::test_scalar_1 -tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumBinaryOperationWithScalar::test_scalar_2 -tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumLarge_param_10_{opt='optimal', subscript='chd,bde,agbc,hiad,bdi,cgh,agdb'}::test_einsum -tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumLarge_param_11_{opt='optimal', subscript='eb,cb,fb->cef'}::test_einsum -tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumLarge_param_12_{opt='optimal', subscript='dd,fb,be,cdb->cef'}::test_einsum -tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumLarge_param_13_{opt='optimal', subscript='bca,cdb,dbf,afc->'}::test_einsum -tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumLarge_param_14_{opt='optimal', subscript='dcc,fce,ea,dbf->ab'}::test_einsum -tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumLarge_param_15_{opt='optimal', subscript='a,ac,ab,ad,cd,bd,bc->'}::test_einsum -tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumLarge_param_1_{opt=('greedy', 0), subscript='acdf,jbje,gihb,hfac'}::test_einsum -tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumLarge_param_2_{opt='greedy', subscript='acdf,jbje,gihb,hfac,gfac,gifabc,hfac'}::test_einsum -tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumLarge_param_3_{opt='greedy', subscript='chd,bde,agbc,hiad,bdi,cgh,agdb'}::test_einsum -tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumLarge_param_4_{opt='greedy', subscript='eb,cb,fb->cef'}::test_einsum -tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumLarge_param_5_{opt='greedy', subscript='dd,fb,be,cdb->cef'}::test_einsum -tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumLarge_param_6_{opt='greedy', subscript='bca,cdb,dbf,afc->'}::test_einsum -tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumLarge_param_7_{opt='greedy', subscript='dcc,fce,ea,dbf->ab'}::test_einsum -tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumLarge_param_8_{opt='greedy', subscript='a,ac,ab,ad,cd,bd,bc->'}::test_einsum -tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumLarge_param_9_{opt='optimal', subscript='acdf,jbje,gihb,hfac,gfac,gifabc,hfac'}::test_einsum -tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumUnaryOperationWithScalar::test_scalar_float -tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumUnaryOperationWithScalar::test_scalar_int -tests/third_party/cupy/linalg_tests/test_einsum.py::TestListArgEinSumError::test_invalid_sub1 - tests/third_party/cupy/manipulation_tests/test_dims.py::TestBroadcast_param_0_{shapes=[(), ()]}::test_broadcast tests/third_party/cupy/manipulation_tests/test_dims.py::TestBroadcast_param_1_{shapes=[(0,), (0,)]}::test_broadcast tests/third_party/cupy/manipulation_tests/test_dims.py::TestBroadcast_param_2_{shapes=[(1,), (1,)]}::test_broadcast diff --git a/tests/test_linalg.py b/tests/test_linalg.py index 7e0b99443d61..a45b2826b3ac 100644 --- a/tests/test_linalg.py +++ b/tests/test_linalg.py @@ -1,3 +1,5 @@ +from ast import Raise + import dpctl import dpctl.tensor as dpt import numpy @@ -584,6 +586,54 @@ def test_eigenvalue_errors(self, func): assert_raises(ValueError, dpnp_func, a_dp, UPLO="N") +class TestEinsum: + def test_einsum_trivial_cases(self): + a = inp.arange(25).reshape(5, 5) + b = inp.arange(5) + a_np = a.asnumpy() + b_np = b.asnumpy() + + # one input, no optimization is needed + result = inp.einsum("i", b, optimize="greedy") + expected = numpy.einsum("i", b_np, optimize="greedy") + assert_dtype_allclose(result, expected) + + # two inputs, no optimization is needed + result = inp.einsum("ij,jk", a, a, optimize="greedy") + expected = numpy.einsum("ij,jk", a_np, a_np, optimize="greedy") + assert_dtype_allclose(result, expected) + + # no optimization in optimal mode + result = inp.einsum("ij,jk", a, a, optimize=["optimal", 1]) + expected = numpy.einsum("ij,jk", a_np, a_np, optimize=["optimal", 1]) + assert_dtype_allclose(result, expected) + + # naive cost equal or smaller than optimized cost + result = inp.einsum("i,i,i", b, b, b, optimize="greedy") + expected = numpy.einsum("i,i,i", b_np, b_np, b_np, optimize="greedy") + assert_dtype_allclose(result, expected) + + def test_einsum_error(self): + a = inp.ones((5, 5)) + # unknown keyword argument + with pytest.raises(TypeError): + inp.einsum("ii->i", a, copy=False) + + # unknown value for optimize keyword + with pytest.raises(TypeError): + inp.einsum("ii->i", a, optimize="average") + + a = inp.ones((5, 4)) + # different size for same label 5 != 4 + with pytest.raises(ValueError): + inp.einsum("ii", a) + + a = inp.ones((5, 5)) + # repeated scripts in output + with pytest.raises(ValueError): + inp.einsum("ij,jk->ii", a, a) + + class TestInv: @pytest.mark.parametrize( "array", diff --git a/tests/test_mathematical.py b/tests/test_mathematical.py index 60ea8c0cd031..017f364b6681 100644 --- a/tests/test_mathematical.py +++ b/tests/test_mathematical.py @@ -2554,14 +2554,14 @@ def setup_method(self): def test_matmul(self, order_pair, shape_pair): order1, order2 = order_pair shape1, shape2 = shape_pair - a1 = numpy.arange(numpy.prod(shape1)).reshape(shape1) - a2 = numpy.arange(numpy.prod(shape2)).reshape(shape2) + dtype = dpnp.default_float_type() + a1 = numpy.arange(numpy.prod(shape1), dtype=dtype).reshape(shape1) + a2 = numpy.arange(numpy.prod(shape2), dtype=dtype).reshape(shape2) a1 = numpy.array(a1, order=order1) a2 = numpy.array(a2, order=order2) b1 = dpnp.asarray(a1) b2 = dpnp.asarray(a2) - result = dpnp.matmul(b1, b2) expected = numpy.matmul(a1, a2) assert_dtype_allclose(result, expected) diff --git a/tests/test_sycl_queue.py b/tests/test_sycl_queue.py index dfa2296dbac1..d79ae8b3337d 100644 --- a/tests/test_sycl_queue.py +++ b/tests/test_sycl_queue.py @@ -1043,6 +1043,29 @@ def test_multi_dot(device): assert_sycl_queue_equal(result.sycl_queue, exec_q) +@pytest.mark.parametrize( + "device", + valid_devices, + ids=[device.filter_string for device in valid_devices], +) +def test_einsum(device): + numpy_array_list = [] + dpnp_array_list = [] + for _ in range(3): # creat arrays one by one + a = numpy.random.rand(10, 10) + b = dpnp.array(a, device=device) + + numpy_array_list.append(a) + dpnp_array_list.append(b) + + result = dpnp.einsum("ij,jk,kl->il", *dpnp_array_list) + expected = numpy.einsum("ij,jk,kl->il", *numpy_array_list) + assert_dtype_allclose(result, expected) + + _, exec_q = get_usm_allocations(dpnp_array_list) + assert_sycl_queue_equal(result.sycl_queue, exec_q) + + @pytest.mark.parametrize( "device", valid_devices, diff --git a/tests/test_usm_type.py b/tests/test_usm_type.py index 7923e60455b1..75e70a978bc1 100644 --- a/tests/test_usm_type.py +++ b/tests/test_usm_type.py @@ -717,6 +717,26 @@ def test_multi_dot(usm_type): assert result.usm_type == usm_type +@pytest.mark.parametrize("usm_type", list_of_usm_types, ids=list_of_usm_types) +def test_einsum(usm_type): + numpy_array_list = [] + dpnp_array_list = [] + for _ in range(3): # creat arrays one by one + a = numpy.random.rand(10, 10) + b = dp.array(a, usm_type=usm_type) + + numpy_array_list.append(a) + dpnp_array_list.append(b) + + result = dp.einsum("ij,jk,kl->il", *dpnp_array_list) + expected = numpy.einsum("ij,jk,kl->il", *numpy_array_list) + assert_dtype_allclose(result, expected) + + input_usm_type, _ = get_usm_allocations(dpnp_array_list) + assert input_usm_type == usm_type + assert result.usm_type == usm_type + + @pytest.mark.parametrize("func", ["take", "take_along_axis"]) @pytest.mark.parametrize("usm_type_x", list_of_usm_types, ids=list_of_usm_types) @pytest.mark.parametrize( diff --git a/tests/third_party/cupy/linalg_tests/test_einsum.py b/tests/third_party/cupy/linalg_tests/test_einsum.py index 613102c49c3f..b382ec28f174 100644 --- a/tests/third_party/cupy/linalg_tests/test_einsum.py +++ b/tests/third_party/cupy/linalg_tests/test_einsum.py @@ -1,4 +1,3 @@ -import unittest import warnings import numpy @@ -8,14 +7,60 @@ from tests.helper import has_support_aspect64 from tests.third_party.cupy import testing +rng = numpy.random.default_rng(seed=0) + def _dec_shape(shape, dec): # Test smaller shape return tuple(1 if s == 1 else max(0, s - dec) for s in shape) -@pytest.mark.usefixtures("allow_fall_back_on_numpy") -class TestEinSumError(unittest.TestCase): +def _rand1_shape(shape, prob): + # Test broadcast + # If diagonals are "broadcasted" we can simply: + # return tuple(1 if rng.uniform() < prob else s for s in shape) + table = {} + new_shape = [] + for s in shape: + if s not in table: + table[s] = 1 if rng.uniform() < prob else s + new_shape.append(table[s]) + return tuple(new_shape) + + +def augment_einsum_testcases(*params): + """Modify shapes in einsum tests + + Shape parameter should be starts with "shape_". + The original parameter is stored as "_raw_params". + + Args: + params (sequence of dicts) + + Yields: + dict: parameter with modified shapes. + + """ + for dec in range(3): + for drop in [False, True]: + for param in params: + param_new = param.copy() + for k in param.keys(): + if k.startswith("shape_"): + new_shape = _dec_shape(param[k], dec) + if drop: + prob = rng.uniform() + new_shape = _rand1_shape(new_shape, prob) + param_new[k] = new_shape + param_new["_raw_params"] = { + "orig": param, + "dec": dec, + "drop": drop, + } + yield param_new + + +class TestEinSumError: def test_irregular_ellipsis1(self): for xp in (numpy, cupy): with pytest.raises(ValueError): @@ -76,7 +121,8 @@ def test_too_few_operands1(self): def test_too_many_dimension1(self): for xp in (numpy, cupy): with pytest.raises(ValueError): - xp.einsum("i", 0) + # modified since dpnp does not support scalar + xp.einsum("i", xp.array(0)) def test_too_many_dimension2(self): for xp in (numpy, cupy): @@ -108,7 +154,7 @@ def test_invalid_char3(self): with pytest.raises(ValueError): xp.einsum("i->&", xp.array([0, 0])) - # output subscripts must appear in inumpy.t + # output subscripts must appear in input def test_invalid_output_subscripts1(self): for xp in (numpy, cupy): with pytest.raises(ValueError): @@ -120,13 +166,13 @@ def test_invalid_output_subscripts2(self): with pytest.raises(ValueError): xp.einsum("ij->jij", xp.array([[0, 0], [0, 0]])) - # output subscripts must not incrudes comma + # output subscripts must not includes comma def test_invalid_output_subscripts3(self): for xp in (numpy, cupy): with pytest.raises(ValueError): xp.einsum("ij->i,j", xp.array([[0, 0], [0, 0]])) - # dimensions much match when being collapsed + # dimensions must match when being collapsed def test_invalid_diagonal1(self): for xp in (numpy, cupy): with pytest.raises(ValueError): @@ -186,31 +232,28 @@ def test_invalid_arrow4(self): xp.einsum("i-", xp.array([0, 0])) -class TestListArgEinSumError(unittest.TestCase): +class TestListArgEinSumError: + @testing.with_requires("numpy>=1.19") def test_invalid_sub1(self): for xp in (numpy, cupy): - with pytest.raises(ValueError): + with pytest.raises(TypeError): xp.einsum(xp.arange(2), [None]) - @pytest.mark.usefixtures("allow_fall_back_on_numpy") def test_invalid_sub2(self): for xp in (numpy, cupy): with pytest.raises(ValueError): xp.einsum(xp.arange(2), [0], [1]) - @pytest.mark.usefixtures("allow_fall_back_on_numpy") def test_invalid_sub3(self): for xp in (numpy, cupy): with pytest.raises(ValueError): xp.einsum(xp.arange(2), [Ellipsis, 0, Ellipsis]) - @pytest.mark.usefixtures("allow_fall_back_on_numpy") def test_dim_mismatch1(self): for xp in (numpy, cupy): with pytest.raises(ValueError): xp.einsum(xp.arange(2), [0], xp.arange(3), [0]) - @pytest.mark.usefixtures("allow_fall_back_on_numpy") def test_dim_mismatch2(self): for xp in (numpy, cupy): with pytest.raises(ValueError): @@ -221,13 +264,12 @@ def test_dim_mismatch3(self): with pytest.raises(ValueError): xp.einsum(xp.arange(6).reshape(2, 3), [0, 0]) - @pytest.mark.usefixtures("allow_fall_back_on_numpy") def test_too_many_dims1(self): for xp in (numpy, cupy): with pytest.raises(ValueError): - xp.einsum(3, [0]) + # modified since dpnp does not support scalar + xp.einsum(xp.array(3), [0]) - @pytest.mark.usefixtures("allow_fall_back_on_numpy") def test_too_many_dims2(self): for xp in (numpy, cupy): with pytest.raises(ValueError): @@ -239,19 +281,200 @@ def test_too_many_dims3(self): xp.einsum(xp.arange(6).reshape(2, 3), [Ellipsis, 0, 1, 2]) -class TestEinSumUnaryOperationWithScalar(unittest.TestCase): +@pytest.mark.parametrize("do_opt", [False, True]) +class TestListArgEinSum: + # numpy/numpy#15961: should accept int64 type in subscript list + @testing.with_requires("numpy>=1.19") + @testing.numpy_cupy_allclose() + def test_numpy_15961_array(self, xp, do_opt): + n = 3 + a = testing.shaped_arange((n, n), xp) + sub = numpy.array([0, 0]) + return xp.einsum(a, sub, optimize=do_opt) + + @testing.with_requires("numpy>=1.19") + @testing.numpy_cupy_allclose() + def test_numpy_15961_list(self, xp, do_opt): + n = 3 + a = testing.shaped_arange((n, n), xp) + sub = list(numpy.array([0, 0])) + return xp.einsum(a, sub, optimize=do_opt) + + +@testing.parameterize( + *augment_einsum_testcases( + {"shape_a": (2, 3), "subscripts": "ij"}, # do nothing + {"shape_a": (2, 3), "subscripts": "..."}, # do nothing + {"shape_a": (2, 3), "subscripts": "ji"}, # transpose + {"shape_a": (3, 3), "subscripts": "ii->i"}, # diagonal 2d + {"shape_a": (3, 3, 3), "subscripts": "jii->ij"}, # partial diagonal 3d + {"shape_a": (3, 3, 3), "subscripts": "iji->ij"}, # partial diagonal 3d + { + "shape_a": (3, 3, 3), + "subscripts": "...ii->...i", + }, # partial diagonal 3d + {"shape_a": (3, 3, 3), "subscripts": "iii->i"}, # diagonal 3d + {"shape_a": (2, 3, 4), "subscripts": "ijk->jik"}, # swap axes + {"shape_a": (2, 3, 4), "subscripts": "ijk->kij"}, # swap axes + {"shape_a": (2, 3, 4), "subscripts": "ijk->ikj"}, # swap axes + {"shape_a": (2, 3, 4), "subscripts": "kji->ikj"}, # swap axes + {"shape_a": (2, 3, 4), "subscripts": "j...i->i...j"}, # swap axes + {"shape_a": (3,), "subscripts": "i->"}, # sum + {"shape_a": (3, 3), "subscripts": "ii"}, # trace + {"shape_a": (2, 2, 2, 2), "subscripts": "ijkj->kij"}, + {"shape_a": (2, 2, 2, 2), "subscripts": "ijij->ij"}, + {"shape_a": (2, 2, 2, 2), "subscripts": "jiji->ij"}, + {"shape_a": (2, 2, 2, 2), "subscripts": "ii...->..."}, # trace + {"shape_a": (2, 2, 2, 2), "subscripts": "i...i->..."}, # trace + {"shape_a": (2, 2, 2, 2), "subscripts": "...ii->..."}, # trace + {"shape_a": (2, 2, 2, 2), "subscripts": "j...i->..."}, # sum + {"shape_a": (2, 3), "subscripts": "ij->ij..."}, # do nothing + {"shape_a": (2, 3), "subscripts": "ij->i...j"}, # do nothing + {"shape_a": (2, 3), "subscripts": "ij->...ij"}, # do nothing + {"shape_a": (2, 3), "subscripts": "ij...->ij"}, # do nothing + {"shape_a": (2, 3), "subscripts": "i...j->ij"}, # do nothing + {"shape_a": (), "subscripts": ""}, # do nothing + {"shape_a": (), "subscripts": "->"}, # do nothing + ) +) +class TestEinSumUnaryOperation: + @testing.for_all_dtypes(no_bool=False) + @testing.numpy_cupy_allclose( + rtol={numpy.float16: 1e-1, "default": 1e-7}, contiguous_check=False + ) + def test_einsum_unary(self, xp, dtype): + a = testing.shaped_arange(self.shape_a, xp, dtype) + out = xp.einsum(self.subscripts, a) + if xp is not numpy: + optimized_out = xp.einsum(self.subscripts, a, optimize=True) + testing.assert_allclose(optimized_out, out) + return out + + @pytest.mark.skip("view is not supported") + @testing.for_all_dtypes(no_bool=False) + @testing.numpy_cupy_equal() + def test_einsum_unary_views(self, xp, dtype): + a = testing.shaped_arange(self.shape_a, xp, dtype) + b = xp.einsum(self.subscripts, a) + + return b.ndim == 0 or b.base is a + + @testing.for_all_dtypes_combination( + ["dtype_a", "dtype_out"], no_bool=False, no_complex=True + ) # avoid ComplexWarning + @testing.numpy_cupy_allclose( + rtol={numpy.float16: 1e-1, "default": 1e-7}, contiguous_check=False + ) + def test_einsum_unary_dtype(self, xp, dtype_a, dtype_out): + if not numpy.can_cast(dtype_a, dtype_out): + pytest.skip() + a = testing.shaped_arange(self.shape_a, xp, dtype_a) + return xp.einsum(self.subscripts, a, dtype=dtype_out) + + +class TestEinSumUnaryOperationWithScalar: + @pytest.mark.skip("All operands are scalar.") @testing.for_all_dtypes() @testing.numpy_cupy_allclose() def test_scalar_int(self, xp, dtype): return xp.asarray(xp.einsum("->", 2, dtype=dtype)) + @pytest.mark.skip("All operands are scalar.") @testing.for_all_dtypes() @testing.numpy_cupy_allclose() def test_scalar_float(self, xp, dtype): return xp.asarray(xp.einsum("", 2.0, dtype=dtype)) -class TestEinSumBinaryOperationWithScalar(unittest.TestCase): +@testing.parameterize( + *augment_einsum_testcases( + # dot vecvec + {"shape_a": (3,), "shape_b": (3,), "subscripts": "i,i"}, + # outer + {"shape_a": (2,), "shape_b": (3,), "subscripts": "i,j"}, + # dot matvec + {"shape_a": (2, 3), "shape_b": (3,), "subscripts": "ij,j"}, + {"shape_a": (2, 3), "shape_b": (2,), "subscripts": "ij,i"}, + # dot matmat + {"shape_a": (2, 3), "shape_b": (3, 4), "subscripts": "ij,jk"}, + # tensordot + { + "shape_a": (3, 4, 2), + "shape_b": (4, 3, 2), + "subscripts": "ijk, jil -> kl", + }, + { + "shape_a": (3, 4, 2), + "shape_b": (4, 2, 3), + "subscripts": "i...,...k->ki...", + }, + { + "shape_a": (3, 4, 2), + "shape_b": (4, 3, 2), + "subscripts": "ij...,ji...->i...", + }, + # trace and tensordot and diagonal + { + "shape_a": (2, 3, 2, 4), + "shape_b": (3, 2, 2), + "subscripts": "ijil,jkk->kj", + }, + { + "shape_a": (2, 4, 2, 3), + "shape_b": (3, 2, 4), + "subscripts": "i...ij,ji...->...j", + }, + # broadcast + { + "shape_a": (2, 3, 4), + "shape_b": (3,), + "subscripts": "ij...,j...->ij...", + }, + { + "shape_a": (2, 3, 4), + "shape_b": (3,), + "subscripts": "ij...,...j->ij...", + }, + {"shape_a": (2, 3, 4), "shape_b": (3,), "subscripts": "ij...,j->ij..."}, + { + "shape_a": (4, 3), + "shape_b": (3, 2), + "subscripts": "ik...,k...->i...", + }, + { + "shape_a": (4, 3), + "shape_b": (3, 2), + "subscripts": "ik...,...kj->i...j", + }, + {"shape_a": (4, 3), "shape_b": (3, 2), "subscripts": "...k,kj"}, + {"shape_a": (4, 3), "shape_b": (3, 2), "subscripts": "ik,k...->i..."}, + {"shape_a": (2, 3, 4, 5), "shape_b": (4,), "subscripts": "ijkl,k"}, + {"shape_a": (2, 3, 4, 5), "shape_b": (4,), "subscripts": "...kl,k"}, + {"shape_a": (2, 3, 4, 5), "shape_b": (4,), "subscripts": "...kl,k..."}, + { + "shape_a": (1, 1, 1, 2, 3, 2), + "shape_b": (2, 3, 2, 2), + "subscripts": "...lmn,lmno->...o", + }, + ) +) +class TestEinSumBinaryOperation: + @testing.for_all_dtypes_combination( + ["dtype_a", "dtype_b"], no_bool=False, no_float16=False + ) + @testing.numpy_cupy_allclose( + type_check=has_support_aspect64(), contiguous_check=False + ) + def test_einsum_binary(self, xp, dtype_a, dtype_b): + a = testing.shaped_arange(self.shape_a, xp, dtype_a) + b = testing.shaped_arange(self.shape_b, xp, dtype_b) + # casting should be added for dpnp to allow cast int64 to float32 + # when default float type is float32 + casting = "safe" if has_support_aspect64() else "unsafe" + return xp.einsum(self.subscripts, a, b, casting=casting) + + +class TestEinSumBinaryOperationWithScalar: @testing.for_all_dtypes() @testing.numpy_cupy_allclose(contiguous_check=False) def test_scalar_1(self, xp, dtype): @@ -267,6 +490,99 @@ def test_scalar_2(self, xp, dtype): return xp.asarray(xp.einsum("i,->", a, 4)) +@testing.parameterize( + *augment_einsum_testcases( + { + "shape_a": (2, 3), + "shape_b": (3, 4), + "shape_c": (4, 5), + "subscripts": "ij,jk,kl", + }, + { + "shape_a": (2, 4), + "shape_b": (2, 3), + "shape_c": (2,), + "subscripts": "ij,ik,i->ijk", + }, + { + "shape_a": (2, 4), + "shape_b": (3, 2), + "shape_c": (2,), + "subscripts": "ij,ki,i->jk", + }, + { + "shape_a": (2, 3, 4), + "shape_b": (2,), + "shape_c": (3, 4, 2), + "subscripts": "i...,i,...i->...i", + }, + { + "shape_a": (2, 3, 4), + "shape_b": (4, 3), + "shape_c": (3, 3, 4), + "subscripts": "a...,...b,c...->abc...", + }, + { + "shape_a": (2, 3, 4), + "shape_b": (3, 4), + "shape_c": (3, 3, 4), + "subscripts": "a...,...,c...->ac...", + }, + { + "shape_a": (3, 3, 4), + "shape_b": (4, 3), + "shape_c": (2, 3, 4), + "subscripts": "a...,...b,c...->abc...", + }, + { + "shape_a": (3, 3, 4), + "shape_b": (3, 4), + "shape_c": (2, 3, 4), + "subscripts": "a...,...,c...->ac...", + }, + ) +) +class TestEinSumTernaryOperation: + @testing.for_all_dtypes_combination( + ["dtype_a", "dtype_b", "dtype_c"], no_bool=False, no_float16=False + ) + @testing.numpy_cupy_allclose( + type_check=has_support_aspect64(), contiguous_check=False + ) + def test_einsum_ternary(self, xp, dtype_a, dtype_b, dtype_c): + a = testing.shaped_arange(self.shape_a, xp, dtype_a) + b = testing.shaped_arange(self.shape_b, xp, dtype_b) + c = testing.shaped_arange(self.shape_c, xp, dtype_c) + + try: + # casting should be added for dpnp to allow cast int64 to float32 + # when default float type is float32 + casting = "safe" if has_support_aspect64() else "unsafe" + out = xp.einsum( + self.subscripts, a, b, c, optimize=False, casting=casting + ) + except TypeError: + assert xp is numpy + out = xp.einsum(self.subscripts, a, b, c) + + if xp is not numpy: # Avoid numpy issues #11059, #11060 + for optimize in [ + True, # "greedy" + "optimal", + ["einsum_path", (0, 1), (0, 1)], + ["einsum_path", (0, 2), (0, 1)], + ["einsum_path", (1, 2), (0, 1)], + ]: + # casting should be added for dpnp to allow cast int64 to float32 + # when default float type is float32 + casting = "safe" if has_support_aspect64() else "unsafe" + optimized_out = xp.einsum( + self.subscripts, a, b, c, optimize=optimize, casting=casting + ) + testing.assert_allclose(optimized_out, out) + return out + + @testing.parameterize( *( [ @@ -292,38 +608,36 @@ def test_scalar_2(self, xp, dtype): ) ) ) -class TestEinSumLarge(unittest.TestCase): - def setUp(self): - chars = "abcdefghij" - sizes = numpy.array([2, 3, 4, 5, 4, 3, 2, 6, 5, 4, 3]) - size_dict = {} - for size, char in zip(sizes, chars): - size_dict[char] = size - - # Builds views based off initial operands - string = self.subscript - operands = [string] - terms = string.split("->")[0].split(",") - for term in terms: - dims = [size_dict[x] for x in term] - operands.append(numpy.ones(*dims)) - - self.operands = operands - - @pytest.mark.usefixtures("allow_fall_back_on_numpy") +class TestEinSumLarge: + chars = "abcdefghij" + sizes = (2, 3, 4, 5, 4, 3, 2, 6, 5, 4, 3) + size_dict = {} + for size, char in zip(sizes, chars): + size_dict[char] = size + + @pytest.fixture() + def shapes(self): + size_dict = self.size_dict + terms = self.subscript.split("->")[0].split(",") + return tuple([tuple([size_dict[x] for x in term]) for term in terms]) + @testing.numpy_cupy_allclose( - type_check=has_support_aspect64(), contiguous_check=False + rtol=2e-6, type_check=has_support_aspect64(), contiguous_check=False ) - def test_einsum(self, xp): + def test_einsum(self, xp, shapes): + arrays = [ + testing.shaped_random(shape, xp, cupy.default_float_type(), scale=1) + for shape in shapes + ] # TODO(kataoka): support memory efficient cupy.einsum with warnings.catch_warnings(record=True) as ws: - # I hope there's no problem with np.einsum for these cases... - out = xp.einsum(*self.operands, optimize=self.opt) + # I hope there"s no problem with np.einsum for these cases... + out = xp.einsum(self.subscript, *arrays, optimize=self.opt) if xp is not numpy and isinstance( self.opt, tuple ): # with memory limit for w in ws: - self.assertIn("memory", str(w.message)) + assert "memory" in str(w.message) else: - self.assertEqual(len(ws), 0) + assert len(ws) == 0 return out