From 1929d69dd61d3449c6c06ee98ed6df3291818855 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Wed, 26 Jun 2024 13:03:55 +0200 Subject: [PATCH 01/11] Update manipulation functions --- dpnp/dpnp_iface_manipulation.py | 63 +++++++++++++++++++++------------ 1 file changed, 40 insertions(+), 23 deletions(-) diff --git a/dpnp/dpnp_iface_manipulation.py b/dpnp/dpnp_iface_manipulation.py index 056ac7907208..29ab1b52f758 100644 --- a/dpnp/dpnp_iface_manipulation.py +++ b/dpnp/dpnp_iface_manipulation.py @@ -39,6 +39,7 @@ import dpctl.tensor as dpt +import dpctl.utils as dpu import numpy from numpy.core.numeric import normalize_axis_index @@ -668,6 +669,8 @@ def concatenate( usm_arrays = [dpnp.get_usm_ndarray(x) for x in arrays] usm_res = dpt.concat(usm_arrays, axis=axis) + + dpu.SequentialOrderManager[usm_res.sycl_queue].wait() res = dpnp_array._create_from_usm_ndarray(usm_res) if dtype is not None: res = res.astype(dtype, casting=casting, copy=False) @@ -907,10 +910,11 @@ def expand_dims(a, axis): """ - usm_array = dpnp.get_usm_ndarray(a) - return dpnp_array._create_from_usm_ndarray( - dpt.expand_dims(usm_array, axis=axis) - ) + usm_a = dpnp.get_usm_ndarray(a) + usm_res = dpt.expand_dims(usm_a, axis=axis) + + dpu.SequentialOrderManager[usm_res.sycl_queue].wait() + return dpnp_array._create_from_usm_ndarray(usm_res) def flip(m, axis=None): @@ -1293,8 +1297,10 @@ def repeat(a, repeats, axis=None): usm_arr = dpnp.get_usm_ndarray(a.flatten()) else: usm_arr = dpnp.get_usm_ndarray(a) - usm_arr = dpt.repeat(usm_arr, rep, axis=axis) - return dpnp_array._create_from_usm_ndarray(usm_arr) + usm_res = dpt.repeat(usm_arr, rep, axis=axis) + + dpu.SequentialOrderManager[usm_res.sycl_queue].wait() + return dpnp_array._create_from_usm_ndarray(usm_res) def reshape(a, /, newshape, order="C", copy=None): @@ -1369,9 +1375,11 @@ def reshape(a, /, newshape, order="C", copy=None): elif order not in "cfCF": raise ValueError(f"order must be one of 'C' or 'F' (got {order})") - usm_arr = dpnp.get_usm_ndarray(a) - usm_arr = dpt.reshape(usm_arr, shape=newshape, order=order, copy=copy) - return dpnp_array._create_from_usm_ndarray(usm_arr) + usm_a = dpnp.get_usm_ndarray(a) + usm_res = dpt.reshape(usm_a, shape=newshape, order=order, copy=copy) + + dpu.SequentialOrderManager[usm_res.sycl_queue].wait() + return dpnp_array._create_from_usm_ndarray(usm_res) def result_type(*arrays_and_dtypes): @@ -1478,10 +1486,12 @@ def roll(x, shift, axis=None): """ if axis is None: return roll(x.reshape(-1), shift, 0).reshape(x.shape) - usm_array = dpnp.get_usm_ndarray(x) - return dpnp_array._create_from_usm_ndarray( - dpt.roll(usm_array, shift=shift, axis=axis) - ) + + usm_x = dpnp.get_usm_ndarray(x) + usm_res = dpt.roll(usm_x, shift=shift, axis=axis) + + dpu.SequentialOrderManager[usm_res.sycl_queue].wait() + return dpnp_array._create_from_usm_ndarray(usm_res) def rollaxis(x, axis, start=0): @@ -1628,10 +1638,11 @@ def squeeze(a, /, axis=None): """ - usm_array = dpnp.get_usm_ndarray(a) - return dpnp_array._create_from_usm_ndarray( - dpt.squeeze(usm_array, axis=axis) - ) + usm_a = dpnp.get_usm_ndarray(a) + usm_res = dpt.squeeze(usm_a, axis=axis) + + dpu.SequentialOrderManager[usm_res.sycl_queue].wait() + return dpnp_array._create_from_usm_ndarray(usm_res) def stack(arrays, /, *, axis=0, out=None, dtype=None, casting="same_kind"): @@ -1709,6 +1720,8 @@ def stack(arrays, /, *, axis=0, out=None, dtype=None, casting="same_kind"): usm_arrays = [dpnp.get_usm_ndarray(x) for x in arrays] usm_res = dpt.stack(usm_arrays, axis=axis) + + dpu.SequentialOrderManager[usm_res.sycl_queue].wait() res = dpnp_array._create_from_usm_ndarray(usm_res) if dtype is not None: res = res.astype(dtype, casting=casting, copy=False) @@ -1767,10 +1780,11 @@ def swapaxes(a, axis1, axis2): """ - usm_array = dpnp.get_usm_ndarray(a) - return dpnp_array._create_from_usm_ndarray( - dpt.swapaxes(usm_array, axis1=axis1, axis2=axis2) - ) + usm_a = dpnp.get_usm_ndarray(a) + usm_res = dpt.swapaxes(usm_a, axis1=axis1, axis2=axis2) + + dpu.SequentialOrderManager[usm_res.sycl_queue].wait() + return dpnp_array._create_from_usm_ndarray(usm_res) # pylint: disable=invalid-name @@ -1848,8 +1862,11 @@ def tile(A, reps): """ - usm_array = dpnp.get_usm_ndarray(A) - return dpnp_array._create_from_usm_ndarray(dpt.tile(usm_array, reps)) + usm_a = dpnp.get_usm_ndarray(A) + usm_res = dpt.tile(usm_a, reps) + + dpu.SequentialOrderManager[usm_res.sycl_queue].wait() + return dpnp_array._create_from_usm_ndarray(usm_res) def transpose(a, axes=None): From 553a02dd41baac3cb2eeb451e80aca0eac920678 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Wed, 26 Jun 2024 15:20:44 +0200 Subject: [PATCH 02/11] Update functions from the array creation container --- dpnp/dpnp_container.py | 11 ++++++ dpnp/dpnp_iface_arraycreation.py | 60 ++++++++++++++++++++++---------- 2 files changed, 53 insertions(+), 18 deletions(-) diff --git a/dpnp/dpnp_container.py b/dpnp/dpnp_container.py index 5322df3324b4..0b658484005b 100644 --- a/dpnp/dpnp_container.py +++ b/dpnp/dpnp_container.py @@ -81,6 +81,7 @@ def arange( sycl_queue=sycl_queue_normalized, ) + dpu.SequentialOrderManager[array_obj.sycl_queue].wait() return dpnp_array(array_obj.shape, buffer=array_obj) @@ -133,6 +134,7 @@ def asarray( if array_obj is x1_obj and isinstance(x1, dpnp_array): return x1 + dpu.SequentialOrderManager[array_obj.sycl_queue].wait() return dpnp_array(array_obj.shape, buffer=array_obj, order=order) @@ -142,6 +144,7 @@ def copy(x1, /, *, order="K"): order = "K" array_obj = dpt.copy(dpnp.get_usm_ndarray(x1), order=order) + dpu.SequentialOrderManager[array_obj.sycl_queue].wait() return dpnp_array(array_obj.shape, buffer=array_obj, order="K") @@ -203,6 +206,7 @@ def eye( usm_type=usm_type, sycl_queue=sycl_queue_normalized, ) + dpu.SequentialOrderManager[array_obj.sycl_queue].wait() return dpnp_array(array_obj.shape, buffer=array_obj, order=order) @@ -237,6 +241,7 @@ def full( usm_type=usm_type, sycl_queue=sycl_queue_normalized, ) + dpu.SequentialOrderManager[array_obj.sycl_queue].wait() return dpnp_array(array_obj.shape, buffer=array_obj, order=order) @@ -268,6 +273,7 @@ def linspace( sycl_queue=sycl_queue_normalized, endpoint=endpoint, ) + dpu.SequentialOrderManager[array_obj.sycl_queue].wait() return dpnp_array(array_obj.shape, buffer=array_obj) @@ -296,18 +302,21 @@ def ones( usm_type=usm_type, sycl_queue=sycl_queue_normalized, ) + dpu.SequentialOrderManager[array_obj.sycl_queue].wait() return dpnp_array(array_obj.shape, buffer=array_obj, order=order) def tril(x1, /, *, k=0): """Creates `dpnp_array` as lower triangular part of an input array.""" array_obj = dpt.tril(dpnp.get_usm_ndarray(x1), k=k) + dpu.SequentialOrderManager[array_obj.sycl_queue].wait() return dpnp_array(array_obj.shape, buffer=array_obj, order="K") def triu(x1, /, *, k=0): """Creates `dpnp_array` as upper triangular part of an input array.""" array_obj = dpt.triu(dpnp.get_usm_ndarray(x1), k=k) + dpu.SequentialOrderManager[array_obj.sycl_queue].wait() return dpnp_array(array_obj.shape, buffer=array_obj, order="K") @@ -336,4 +345,6 @@ def zeros( usm_type=usm_type, sycl_queue=sycl_queue_normalized, ) + # TODO: uncomment once dpctl implements asynchronous call + # dpu.SequentialOrderManager[array_obj.sycl_queue].wait() return dpnp_array(array_obj.shape, buffer=array_obj, order=order) diff --git a/dpnp/dpnp_iface_arraycreation.py b/dpnp/dpnp_iface_arraycreation.py index 5cf63ea0fca0..f51d3297002e 100644 --- a/dpnp/dpnp_iface_arraycreation.py +++ b/dpnp/dpnp_iface_arraycreation.py @@ -40,6 +40,8 @@ import operator +import dpctl.tensor as dpt +import dpctl.utils as dpu import numpy import dpnp @@ -51,6 +53,10 @@ dpnp_logspace, dpnp_nd_grid, ) +from .dpnp_array import dpnp_array + +# pylint: disable=no-name-in-module +from .dpnp_utils import get_usm_allocations, map_dtype_to_device __all__ = [ "arange", @@ -2720,21 +2726,30 @@ def meshgrid(*xi, copy=True, sparse=False, indexing="xy"): "Unrecognized indexing keyword value, expecting 'xy' or 'ij'." ) + if ndim < 1: + return [] + s0 = (1,) * ndim output = [ - dpnp.reshape(x, s0[:i] + (-1,) + s0[i + 1 :]) for i, x in enumerate(xi) + dpt.reshape(dpnp.get_usm_ndarray(x), s0[:i] + (-1,) + s0[i + 1 :]) + for i, x in enumerate(xi) ] + # input arrays must be allocated on the same queue + _, exec_q = get_usm_allocations(output) + if indexing == "xy" and ndim > 1: - output[0] = output[0].reshape((1, -1) + s0[2:]) - output[1] = output[1].reshape((-1, 1) + s0[2:]) + output[0] = dpt.reshape(output[0], (1, -1) + s0[2:]) + output[1] = dpt.reshape(output[1], (-1, 1) + s0[2:]) if not sparse: - output = dpnp.broadcast_arrays(*output) + output = dpt.broadcast_arrays(*output) if copy: - output = [x.copy() for x in output] + output = [dpt.copy(x) for x in output] + dpu.SequentialOrderManager[exec_q].wait() + output = [dpnp_array._create_from_usm_ndarray(x) for x in output] return output @@ -3261,7 +3276,10 @@ def tri( _dtype = dpnp.default_float_type() if dtype in (dpnp.float, None) else dtype - m = dpnp.ones( + if usm_type is None: + usm_type = "device" + + m = dpt.ones( (N, M), dtype=_dtype, device=device, @@ -3469,28 +3487,34 @@ def vander( [125, 25, 5, 1]]), Device(level_zero:gpu:0), 'host') """ - x = dpnp.asarray(x, device=device, usm_type=usm_type, sycl_queue=sycl_queue) + if dpnp.is_supported_array_type(x): + x = dpnp.get_usm_ndarray(x) + usm_x = dpt.asarray( + x, device=device, usm_type=usm_type, sycl_queue=sycl_queue + ) + + x_sycl_queue = usm_x.sycl_queue + x_usm_type = usm_x.usm_type if N is not None and not isinstance(N, int): raise TypeError(f"An integer is required, but got {type(N)}") - if x.ndim != 1: + if usm_x.ndim != 1: raise ValueError("`x` must be a one-dimensional array or sequence.") if N is None: - N = x.size + N = usm_x.size + + _dtype = numpy.promote_types(usm_x.dtype, int) + _dtype = map_dtype_to_device(_dtype, x_sycl_queue.sycl_device) + m = dpnp.empty_like(usm_x, shape=(usm_x.size, N), dtype=_dtype) - _dtype = int if x.dtype == bool else x.dtype - m = empty( - (x.size, N), - dtype=_dtype, - usm_type=x.usm_type, - sycl_queue=x.sycl_queue, - ) tmp = m[:, ::-1] if not increasing else m dpnp.power( - x.reshape(-1, 1), - dpnp.arange(N, dtype=_dtype, sycl_queue=x.sycl_queue), + dpt.reshape(usm_x, (-1, 1)), + dpt.arange( + N, dtype=_dtype, usm_type=x_usm_type, sycl_queue=x_sycl_queue + ), out=tmp, ) return m From 5e5f895485659ed99c909ba484332042ed2974dc Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Wed, 26 Jun 2024 16:31:36 +0200 Subject: [PATCH 03/11] Update dpnp array methods --- dpnp/dpnp_array.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dpnp/dpnp_array.py b/dpnp/dpnp_array.py index fd2d06f74285..c25bba998b31 100644 --- a/dpnp/dpnp_array.py +++ b/dpnp/dpnp_array.py @@ -25,6 +25,7 @@ # ***************************************************************************** import dpctl.tensor as dpt +import dpctl.utils as dpu import dpnp @@ -258,6 +259,8 @@ def __getitem__(self, key): res = self.__new__(dpnp_array) res._array_obj = item + if self._array_obj.usm_data is not res._array_obj.usm_data: + dpu.SequentialOrderManager[self.sycl_queue].wait() return res def __gt__(self, other): @@ -454,6 +457,7 @@ def __setitem__(self, key, val): val = val.get_array() self._array_obj.__setitem__(key, val) + dpu.SequentialOrderManager[self.sycl_queue].wait() # '__setstate__', # '__sizeof__', From 51746d19ed52cf9a2c1cd24cfa3548837be2d540 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Fri, 28 Jun 2024 15:36:10 +0200 Subject: [PATCH 04/11] Implement backward compatible solution --- dpnp/dpnp_array.py | 5 ++--- dpnp/dpnp_container.py | 20 ++++++++++---------- dpnp/dpnp_iface.py | 15 +++++++++++++++ dpnp/dpnp_iface_arraycreation.py | 5 ++--- dpnp/dpnp_iface_manipulation.py | 21 +++++++++++---------- 5 files changed, 40 insertions(+), 26 deletions(-) diff --git a/dpnp/dpnp_array.py b/dpnp/dpnp_array.py index c25bba998b31..d9936872a890 100644 --- a/dpnp/dpnp_array.py +++ b/dpnp/dpnp_array.py @@ -25,7 +25,6 @@ # ***************************************************************************** import dpctl.tensor as dpt -import dpctl.utils as dpu import dpnp @@ -260,7 +259,7 @@ def __getitem__(self, key): res._array_obj = item if self._array_obj.usm_data is not res._array_obj.usm_data: - dpu.SequentialOrderManager[self.sycl_queue].wait() + dpnp.synchronize_array_data(self) return res def __gt__(self, other): @@ -457,7 +456,7 @@ def __setitem__(self, key, val): val = val.get_array() self._array_obj.__setitem__(key, val) - dpu.SequentialOrderManager[self.sycl_queue].wait() + dpnp.synchronize_array_data(self) # '__setstate__', # '__sizeof__', diff --git a/dpnp/dpnp_container.py b/dpnp/dpnp_container.py index 0b658484005b..5b17f9e87151 100644 --- a/dpnp/dpnp_container.py +++ b/dpnp/dpnp_container.py @@ -81,7 +81,7 @@ def arange( sycl_queue=sycl_queue_normalized, ) - dpu.SequentialOrderManager[array_obj.sycl_queue].wait() + dpnp.synchronize_array_data(array_obj) return dpnp_array(array_obj.shape, buffer=array_obj) @@ -134,7 +134,7 @@ def asarray( if array_obj is x1_obj and isinstance(x1, dpnp_array): return x1 - dpu.SequentialOrderManager[array_obj.sycl_queue].wait() + dpnp.synchronize_array_data(array_obj) return dpnp_array(array_obj.shape, buffer=array_obj, order=order) @@ -144,7 +144,7 @@ def copy(x1, /, *, order="K"): order = "K" array_obj = dpt.copy(dpnp.get_usm_ndarray(x1), order=order) - dpu.SequentialOrderManager[array_obj.sycl_queue].wait() + dpnp.synchronize_array_data(array_obj) return dpnp_array(array_obj.shape, buffer=array_obj, order="K") @@ -206,7 +206,7 @@ def eye( usm_type=usm_type, sycl_queue=sycl_queue_normalized, ) - dpu.SequentialOrderManager[array_obj.sycl_queue].wait() + dpnp.synchronize_array_data(array_obj) return dpnp_array(array_obj.shape, buffer=array_obj, order=order) @@ -241,7 +241,7 @@ def full( usm_type=usm_type, sycl_queue=sycl_queue_normalized, ) - dpu.SequentialOrderManager[array_obj.sycl_queue].wait() + dpnp.synchronize_array_data(array_obj) return dpnp_array(array_obj.shape, buffer=array_obj, order=order) @@ -273,7 +273,7 @@ def linspace( sycl_queue=sycl_queue_normalized, endpoint=endpoint, ) - dpu.SequentialOrderManager[array_obj.sycl_queue].wait() + dpnp.synchronize_array_data(array_obj) return dpnp_array(array_obj.shape, buffer=array_obj) @@ -302,21 +302,21 @@ def ones( usm_type=usm_type, sycl_queue=sycl_queue_normalized, ) - dpu.SequentialOrderManager[array_obj.sycl_queue].wait() + dpnp.synchronize_array_data(array_obj) return dpnp_array(array_obj.shape, buffer=array_obj, order=order) def tril(x1, /, *, k=0): """Creates `dpnp_array` as lower triangular part of an input array.""" array_obj = dpt.tril(dpnp.get_usm_ndarray(x1), k=k) - dpu.SequentialOrderManager[array_obj.sycl_queue].wait() + dpnp.synchronize_array_data(array_obj) return dpnp_array(array_obj.shape, buffer=array_obj, order="K") def triu(x1, /, *, k=0): """Creates `dpnp_array` as upper triangular part of an input array.""" array_obj = dpt.triu(dpnp.get_usm_ndarray(x1), k=k) - dpu.SequentialOrderManager[array_obj.sycl_queue].wait() + dpnp.synchronize_array_data(array_obj) return dpnp_array(array_obj.shape, buffer=array_obj, order="K") @@ -346,5 +346,5 @@ def zeros( sycl_queue=sycl_queue_normalized, ) # TODO: uncomment once dpctl implements asynchronous call - # dpu.SequentialOrderManager[array_obj.sycl_queue].wait() + # dpnp.synchronize_array_data(array_obj) return dpnp_array(array_obj.shape, buffer=array_obj, order=order) diff --git a/dpnp/dpnp_iface.py b/dpnp/dpnp_iface.py index 49e7b41c01c9..8e5304cd9eb0 100644 --- a/dpnp/dpnp_iface.py +++ b/dpnp/dpnp_iface.py @@ -42,6 +42,7 @@ import dpctl import dpctl.tensor as dpt +import dpctl.utils as dpu import numpy from dpctl.tensor._device import normalize_queue_device @@ -69,6 +70,7 @@ "get_usm_ndarray_or_scalar", "is_supported_array_or_scalar", "is_supported_array_type", + "synchronize_array_data", ] from dpnp import float64, isscalar @@ -699,3 +701,16 @@ def is_supported_array_type(a): """ return isinstance(a, (dpnp_array, dpt.usm_ndarray)) + + +def synchronize_array_data(a): + """ + The dpctl interface was reworked to make asynchronous execution. + That function makes a synchronization call to ensure array data is valid + before exit from dpnp interface function. + + """ + + if hasattr(dpu, "SequentialOrderManager"): + check_supported_arrays_type(a) + dpu.SequentialOrderManager[a.sycl_queue].wait() diff --git a/dpnp/dpnp_iface_arraycreation.py b/dpnp/dpnp_iface_arraycreation.py index f51d3297002e..e80f50fea72f 100644 --- a/dpnp/dpnp_iface_arraycreation.py +++ b/dpnp/dpnp_iface_arraycreation.py @@ -41,7 +41,6 @@ import operator import dpctl.tensor as dpt -import dpctl.utils as dpu import numpy import dpnp @@ -2736,7 +2735,7 @@ def meshgrid(*xi, copy=True, sparse=False, indexing="xy"): ] # input arrays must be allocated on the same queue - _, exec_q = get_usm_allocations(output) + _, _ = get_usm_allocations(output) if indexing == "xy" and ndim > 1: output[0] = dpt.reshape(output[0], (1, -1) + s0[2:]) @@ -2748,7 +2747,7 @@ def meshgrid(*xi, copy=True, sparse=False, indexing="xy"): if copy: output = [dpt.copy(x) for x in output] - dpu.SequentialOrderManager[exec_q].wait() + dpnp.synchronize_array_data(output[0]) output = [dpnp_array._create_from_usm_ndarray(x) for x in output] return output diff --git a/dpnp/dpnp_iface_manipulation.py b/dpnp/dpnp_iface_manipulation.py index 28165aab4ba3..a4b7352d4e64 100644 --- a/dpnp/dpnp_iface_manipulation.py +++ b/dpnp/dpnp_iface_manipulation.py @@ -39,7 +39,6 @@ import dpctl.tensor as dpt -import dpctl.utils as dpu import numpy from numpy.core.numeric import normalize_axis_index @@ -670,13 +669,14 @@ def concatenate( usm_arrays = [dpnp.get_usm_ndarray(x) for x in arrays] usm_res = dpt.concat(usm_arrays, axis=axis) - dpu.SequentialOrderManager[usm_res.sycl_queue].wait() res = dpnp_array._create_from_usm_ndarray(usm_res) if dtype is not None: res = res.astype(dtype, casting=casting, copy=False) elif out is not None: dpnp.copyto(out, res, casting=casting) return out + + dpnp.synchronize_array_data(res) return res @@ -913,7 +913,7 @@ def expand_dims(a, axis): usm_a = dpnp.get_usm_ndarray(a) usm_res = dpt.expand_dims(usm_a, axis=axis) - dpu.SequentialOrderManager[usm_res.sycl_queue].wait() + dpnp.synchronize_array_data(usm_res) return dpnp_array._create_from_usm_ndarray(usm_res) @@ -1304,7 +1304,7 @@ def repeat(a, repeats, axis=None): usm_arr = dpnp.get_usm_ndarray(a) usm_res = dpt.repeat(usm_arr, repeats, axis=axis) - dpu.SequentialOrderManager[usm_res.sycl_queue].wait() + dpnp.synchronize_array_data(usm_res) return dpnp_array._create_from_usm_ndarray(usm_res) @@ -1383,7 +1383,7 @@ def reshape(a, /, newshape, order="C", copy=None): usm_a = dpnp.get_usm_ndarray(a) usm_res = dpt.reshape(usm_a, shape=newshape, order=order, copy=copy) - dpu.SequentialOrderManager[usm_res.sycl_queue].wait() + dpnp.synchronize_array_data(usm_res) return dpnp_array._create_from_usm_ndarray(usm_res) @@ -1495,7 +1495,7 @@ def roll(x, shift, axis=None): usm_x = dpnp.get_usm_ndarray(x) usm_res = dpt.roll(usm_x, shift=shift, axis=axis) - dpu.SequentialOrderManager[usm_res.sycl_queue].wait() + dpnp.synchronize_array_data(usm_res) return dpnp_array._create_from_usm_ndarray(usm_res) @@ -1646,7 +1646,7 @@ def squeeze(a, /, axis=None): usm_a = dpnp.get_usm_ndarray(a) usm_res = dpt.squeeze(usm_a, axis=axis) - dpu.SequentialOrderManager[usm_res.sycl_queue].wait() + dpnp.synchronize_array_data(usm_res) return dpnp_array._create_from_usm_ndarray(usm_res) @@ -1726,13 +1726,14 @@ def stack(arrays, /, *, axis=0, out=None, dtype=None, casting="same_kind"): usm_arrays = [dpnp.get_usm_ndarray(x) for x in arrays] usm_res = dpt.stack(usm_arrays, axis=axis) - dpu.SequentialOrderManager[usm_res.sycl_queue].wait() res = dpnp_array._create_from_usm_ndarray(usm_res) if dtype is not None: res = res.astype(dtype, casting=casting, copy=False) elif out is not None: dpnp.copyto(out, res, casting=casting) return out + + dpnp.synchronize_array_data(res) return res @@ -1788,7 +1789,7 @@ def swapaxes(a, axis1, axis2): usm_a = dpnp.get_usm_ndarray(a) usm_res = dpt.swapaxes(usm_a, axis1=axis1, axis2=axis2) - dpu.SequentialOrderManager[usm_res.sycl_queue].wait() + dpnp.synchronize_array_data(usm_res) return dpnp_array._create_from_usm_ndarray(usm_res) @@ -1870,7 +1871,7 @@ def tile(A, reps): usm_a = dpnp.get_usm_ndarray(A) usm_res = dpt.tile(usm_a, reps) - dpu.SequentialOrderManager[usm_res.sycl_queue].wait() + dpnp.synchronize_array_data(usm_res) return dpnp_array._create_from_usm_ndarray(usm_res) From e679694303592b5e5481ae657836123455819fdd Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Fri, 28 Jun 2024 16:52:08 +0200 Subject: [PATCH 05/11] dpnp.meshgrid has to follow CFD and prohibit input arrays allocating on different SYCL queues --- tests/test_sycl_queue.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/tests/test_sycl_queue.py b/tests/test_sycl_queue.py index 378ecaf9b197..f7c70320dbfb 100644 --- a/tests/test_sycl_queue.py +++ b/tests/test_sycl_queue.py @@ -373,18 +373,13 @@ def test_array_creation_load_txt(device): @pytest.mark.parametrize( - "device_x", - valid_devices, - ids=[device.filter_string for device in valid_devices], -) -@pytest.mark.parametrize( - "device_y", + "device", valid_devices, ids=[device.filter_string for device in valid_devices], ) -def test_meshgrid(device_x, device_y): - x = dpnp.arange(100, device=device_x) - y = dpnp.arange(100, device=device_y) +def test_meshgrid(device): + x = dpnp.arange(100, device=device) + y = dpnp.arange(100, device=device) z = dpnp.meshgrid(x, y) assert_sycl_queue_equal(z[0].sycl_queue, x.sycl_queue) assert_sycl_queue_equal(z[1].sycl_queue, y.sycl_queue) From 6f33b3d40ccb01d47ba8424aab9afa1fd3a48ad5 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Mon, 1 Jul 2024 14:57:14 +0200 Subject: [PATCH 06/11] updated linspace, logspace and geomspace functions --- dpnp/dpnp_algo/dpnp_arraycreation.py | 107 ++++++++++++++------------- dpnp/dpnp_iface_arraycreation.py | 22 +++++- 2 files changed, 73 insertions(+), 56 deletions(-) diff --git a/dpnp/dpnp_algo/dpnp_arraycreation.py b/dpnp/dpnp_algo/dpnp_arraycreation.py index 83cd9da4acf2..aeb77ae34c7e 100644 --- a/dpnp/dpnp_algo/dpnp_arraycreation.py +++ b/dpnp/dpnp_algo/dpnp_arraycreation.py @@ -1,12 +1,13 @@ import math import operator +import dpctl.tensor as dpt import dpctl.utils as dpu import numpy import dpnp -import dpnp.dpnp_container as dpnp_container import dpnp.dpnp_utils as utils +from dpnp.dpnp_array import dpnp_array __all__ = [ "dpnp_geomspace", @@ -16,6 +17,12 @@ ] +def _as_usm_ndarray(a, usm_type, sycl_queue): + if isinstance(a, dpnp_array): + return a.get_array() + return dpt.asarray(a, usm_type=usm_type, sycl_queue=sycl_queue) + + def dpnp_geomspace( start, stop, @@ -40,14 +47,8 @@ def dpnp_geomspace( else: _usm_type = usm_type - if not dpnp.is_supported_array_type(start): - start = dpnp.asarray( - start, usm_type=_usm_type, sycl_queue=sycl_queue_normalized - ) - if not dpnp.is_supported_array_type(stop): - stop = dpnp.asarray( - stop, usm_type=_usm_type, sycl_queue=sycl_queue_normalized - ) + start = _as_usm_ndarray(start, _usm_type, sycl_queue_normalized) + stop = _as_usm_ndarray(stop, _usm_type, sycl_queue_normalized) dt = numpy.result_type(start, stop, float(num)) dt = utils.map_dtype_to_device(dt, sycl_queue_normalized.sycl_device) @@ -57,8 +58,8 @@ def dpnp_geomspace( if dpnp.any(start == 0) or dpnp.any(stop == 0): raise ValueError("Geometric sequence cannot include zero") - out_sign = dpnp.ones( - dpnp.broadcast_arrays(start, stop)[0].shape, + out_sign = dpt.ones( + dpt.broadcast_arrays(start, stop)[0].shape, dtype=dt, usm_type=_usm_type, sycl_queue=sycl_queue_normalized, @@ -72,15 +73,15 @@ def dpnp_geomspace( stop[all_imag] = stop[all_imag].imag out_sign[all_imag] = 1j - both_negative = (dpnp.sign(start) == -1) & (dpnp.sign(stop) == -1) + both_negative = (dpt.sign(start) == -1) & (dpt.sign(stop) == -1) if dpnp.any(both_negative): - dpnp.negative(start[both_negative], out=start[both_negative]) - dpnp.negative(stop[both_negative], out=stop[both_negative]) - dpnp.negative(out_sign[both_negative], out=out_sign[both_negative]) + dpt.negative(start[both_negative], out=start[both_negative]) + dpt.negative(stop[both_negative], out=stop[both_negative]) + dpt.negative(out_sign[both_negative], out=out_sign[both_negative]) - log_start = dpnp.log10(start) - log_stop = dpnp.log10(stop) - result = dpnp_logspace( + log_start = dpt.log10(start) + log_stop = dpt.log10(stop) + res = dpnp_logspace( log_start, log_stop, num=num, @@ -92,16 +93,16 @@ def dpnp_geomspace( ) if num > 0: - result[0] = start + res[0] = start if num > 1 and endpoint: - result[-1] = stop + res[-1] = stop - result = out_sign * result + res = out_sign * res if axis != 0: - result = dpnp.moveaxis(result, 0, axis) + res = dpt.moveaxis(res, 0, axis) - return result.astype(dtype, copy=False) + return dpt.astype(res, dtype, copy=False) def dpnp_linspace( @@ -129,14 +130,11 @@ def dpnp_linspace( else: _usm_type = usm_type - if not hasattr(start, "dtype") and not dpnp.isscalar(start): - start = dpnp.asarray( - start, usm_type=_usm_type, sycl_queue=sycl_queue_normalized - ) - if not hasattr(stop, "dtype") and not dpnp.isscalar(stop): - stop = dpnp.asarray( - stop, usm_type=_usm_type, sycl_queue=sycl_queue_normalized - ) + if not dpnp.isscalar(start): + start = _as_usm_ndarray(start, _usm_type, sycl_queue_normalized) + + if not dpnp.isscalar(stop): + stop = _as_usm_ndarray(stop, _usm_type, sycl_queue_normalized) dt = numpy.result_type(start, stop, float(num)) dt = utils.map_dtype_to_device(dt, sycl_queue_normalized.sycl_device) @@ -155,7 +153,7 @@ def dpnp_linspace( if dpnp.isscalar(start) and dpnp.isscalar(stop): # Call linspace() function for scalars. - res = dpnp_container.linspace( + usm_res = dpt.linspace( start, stop, num, @@ -167,17 +165,17 @@ def dpnp_linspace( if retstep is True and step_nan is False: step = (stop - start) / step_num else: - _start = dpnp.asarray( + usm_start = dpt.asarray( start, dtype=dt, usm_type=_usm_type, sycl_queue=sycl_queue_normalized, ) - _stop = dpnp.asarray( + usm_stop = dpt.asarray( stop, dtype=dt, usm_type=_usm_type, sycl_queue=sycl_queue_normalized ) - res = dpnp_container.arange( + usm_res = dpt.arange( 0, stop=num, step=1, @@ -187,28 +185,27 @@ def dpnp_linspace( ) if step_nan is False: - step = (_stop - _start) / step_num - res = res.reshape((-1,) + (1,) * step.ndim) - res = res * step + _start + step = (usm_stop - usm_start) / step_num + usm_res = dpt.reshape(usm_res, (-1,) + (1,) * step.ndim, copy=False) + usm_res = usm_res * step + usm_res += usm_start if endpoint and num > 1: - res[-1] = dpnp_container.full(step.shape, _stop) + usm_res[-1] = dpt.full(step.shape, usm_stop) if axis != 0: - res = dpnp.moveaxis(res, 0, axis) + usm_res = dpt.moveaxis(usm_res, 0, axis) if numpy.issubdtype(dtype, dpnp.integer): - dpnp.floor(res, out=res) - - res = res.astype(dtype, copy=False) + dpt.floor(usm_res, out=usm_res) + res = dpt.astype(usm_res, dtype, copy=False) if retstep is True: if dpnp.isscalar(step): - step = dpnp.asarray( + step = dpt.asarray( step, usm_type=res.usm_type, sycl_queue=res.sycl_queue ) return (res, step) - return res @@ -239,12 +236,15 @@ def dpnp_logspace( usm_type = "device" if usm_type_alloc is None else usm_type_alloc else: usm_type = usm_type - start = dpnp.asarray(start, usm_type=usm_type, sycl_queue=sycl_queue) - stop = dpnp.asarray(stop, usm_type=usm_type, sycl_queue=sycl_queue) - base = dpnp.asarray(base, usm_type=usm_type, sycl_queue=sycl_queue) - [start, stop, base] = dpnp.broadcast_arrays(start, stop, base) - base = dpnp.expand_dims(base, axis=axis) + start = _as_usm_ndarray(start, usm_type, sycl_queue) + stop = _as_usm_ndarray(stop, usm_type, sycl_queue) + base = _as_usm_ndarray(base, usm_type, sycl_queue) + + [start, stop, base] = dpt.broadcast_arrays(start, stop, base) + base = dpt.expand_dims(base, axis=axis) + + # assume res as not a tuple, because retstep is False res = dpnp_linspace( start, stop, @@ -256,9 +256,10 @@ def dpnp_logspace( axis=axis, ) - if dtype is None: - return dpnp.power(base, res) - return dpnp.power(base, res).astype(dtype, copy=False) + dpt.pow(base, res, out=res) + if dtype is not None: + res = dpt.astype(res, dtype, copy=False) + return res class dpnp_nd_grid: diff --git a/dpnp/dpnp_iface_arraycreation.py b/dpnp/dpnp_iface_arraycreation.py index e80f50fea72f..0f74906d6b80 100644 --- a/dpnp/dpnp_iface_arraycreation.py +++ b/dpnp/dpnp_iface_arraycreation.py @@ -2188,7 +2188,7 @@ def geomspace( """ - return dpnp_geomspace( + res = dpnp_geomspace( start, stop, num, @@ -2200,6 +2200,10 @@ def geomspace( axis=axis, ) + dpnp.synchronize_array_data(res) + res = dpnp_array._create_from_usm_ndarray(res) + return res + def identity( n, @@ -2407,7 +2411,7 @@ def linspace( """ - return dpnp_linspace( + res = dpnp_linspace( start, stop, num, @@ -2420,6 +2424,14 @@ def linspace( axis=axis, ) + if isinstance(res, tuple): # (result, step) is returning + dpnp.synchronize_array_data(res[0]) + res = tuple(dpnp_array._create_from_usm_ndarray(x) for x in res) + else: + dpnp.synchronize_array_data(res) + res = dpnp_array._create_from_usm_ndarray(res) + return res + def loadtxt( fname, @@ -2634,7 +2646,7 @@ def logspace( """ - return dpnp_logspace( + res = dpnp_logspace( start, stop, num=num, @@ -2647,6 +2659,10 @@ def logspace( axis=axis, ) + dpnp.synchronize_array_data(res) + res = dpnp_array._create_from_usm_ndarray(res) + return res + # pylint: disable=redefined-outer-name def meshgrid(*xi, copy=True, sparse=False, indexing="xy"): From c207efc80e8ed86d23f1d1cf7a34cc72ef7b9bdd Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Mon, 1 Jul 2024 15:55:20 +0200 Subject: [PATCH 07/11] Updated elementwise functions and astype --- dpnp/dpnp_algo/dpnp_elementwise_common.py | 125 +++++++++++++--------- dpnp/dpnp_iface.py | 4 +- 2 files changed, 76 insertions(+), 53 deletions(-) diff --git a/dpnp/dpnp_algo/dpnp_elementwise_common.py b/dpnp/dpnp_algo/dpnp_elementwise_common.py index 374981a63031..b13ea56bc329 100644 --- a/dpnp/dpnp_algo/dpnp_elementwise_common.py +++ b/dpnp/dpnp_algo/dpnp_elementwise_common.py @@ -24,6 +24,7 @@ # THE POSSIBILITY OF SUCH DAMAGE. # ***************************************************************************** +import dpctl.tensor as dpt import numpy from dpctl.tensor._elementwise_common import ( BinaryElementwiseFunc, @@ -161,24 +162,27 @@ def __call__( f"Requested function={self.name_} only takes `out` or `dtype`" "as an argument, but both were provided." ) + + if order is None: + order = "K" + elif order in "afkcAFKC": + order = order.upper() else: - if order is None: - order = "K" - elif order in "afkcAFKC": - order = order.upper() - else: - raise ValueError( - "order must be one of 'C', 'F', 'A', or 'K' " - f"(got '{order}')" - ) - if dtype is not None: - x = dpnp.astype(x, dtype=dtype, copy=False) - x_usm = dpnp.get_usm_ndarray(x) - out_usm = None if out is None else dpnp.get_usm_ndarray(out) - res_usm = super().__call__(x_usm, out=out_usm, order=order) - if out is not None and isinstance(out, dpnp_array): - return out - return dpnp_array._create_from_usm_ndarray(res_usm) + raise ValueError( + "order must be one of 'C', 'F', 'A', or 'K' " f"(got '{order}')" + ) + + x_usm = dpnp.get_usm_ndarray(x) + if dtype is not None: + x_usm = dpt.astype(x_usm, dtype, copy=False) + + out_usm = None if out is None else dpnp.get_usm_ndarray(out) + res_usm = super().__call__(x_usm, out=out_usm, order=order) + + dpnp.synchronize_array_data(res_usm) + if out is not None and isinstance(out, dpnp_array): + return out + return dpnp_array._create_from_usm_ndarray(res_usm) class DPNPBinaryFunc(BinaryElementwiseFunc): @@ -311,35 +315,47 @@ def __call__( f"Requested function={self.name_} only takes `out` or `dtype`" "as an argument, but both were provided." ) + + if order is None: + order = "K" + elif order in "afkcAFKC": + order = order.upper() else: - if order is None: - order = "K" - elif order in "afkcAFKC": - order = order.upper() - else: - raise ValueError( - "order must be one of 'C', 'F', 'A', or 'K' " - f"(got '{order}')" + raise ValueError( + "order must be one of 'C', 'F', 'A', or 'K' (got '{order}')" + ) + + x1_usm = dpnp.get_usm_ndarray_or_scalar(x1) + x2_usm = dpnp.get_usm_ndarray_or_scalar(x2) + + if dtype is not None: + if dpnp.isscalar(x1): + x1_usm = dpt.asarray( + x1, + dtype=dtype, + sycl_queue=x2.sycl_queue, + usm_type=x2.usm_type, ) - if dtype is not None: - if dpnp.isscalar(x1): - x1 = dpnp.asarray(x1, dtype=dtype) - x2 = dpnp.astype(x2, dtype=dtype, copy=False) - elif dpnp.isscalar(x2): - x1 = dpnp.astype(x1, dtype=dtype, copy=False) - x2 = dpnp.asarray(x2, dtype=dtype) - else: - x1 = dpnp.astype(x1, dtype=dtype, copy=False) - x2 = dpnp.astype(x2, dtype=dtype, copy=False) - - x1_usm = dpnp.get_usm_ndarray_or_scalar(x1) - x2_usm = dpnp.get_usm_ndarray_or_scalar(x2) + x2_usm = dpt.astype(x2_usm, dtype, copy=False) + elif dpnp.isscalar(x2): + x1_usm = dpt.astype(x1_usm, dtype, copy=False) + x2_usm = dpt.asarray( + x2, + dtype=dtype, + sycl_queue=x1.sycl_queue, + usm_type=x1.usm_type, + ) + else: + x1_usm = dpt.astype(x1_usm, dtype, copy=False) + x2_usm = dpt.astype(x2_usm, dtype, copy=False) - out_usm = None if out is None else dpnp.get_usm_ndarray(out) - res_usm = super().__call__(x1_usm, x2_usm, out=out_usm, order=order) - if out is not None and isinstance(out, dpnp_array): - return out - return dpnp_array._create_from_usm_ndarray(res_usm) + out_usm = None if out is None else dpnp.get_usm_ndarray(out) + res_usm = super().__call__(x1_usm, x2_usm, out=out_usm, order=order) + + dpnp.synchronize_array_data(res_usm) + if out is not None and isinstance(out, dpnp_array): + return out + return dpnp_array._create_from_usm_ndarray(res_usm) def outer( self, @@ -463,7 +479,7 @@ def __init__( def __call__(self, x, deg=False): res = super().__call__(x) if deg is True: - res = res * (180 / dpnp.pi) + res *= 180 / dpnp.pi return res @@ -513,14 +529,21 @@ def __init__( def __call__(self, x, decimals=0, out=None, dtype=None): if decimals != 0: - if dpnp.issubdtype(x.dtype, dpnp.integer) and dtype is None: - dtype = x.dtype - res = dpnp.true_divide( - dpnp.rint(x * 10**decimals, out=out), 10**decimals, out=out - ) + x_usm = dpnp.get_usm_ndarray(x) + if dpnp.issubdtype(x_usm.dtype, dpnp.integer) and dtype is None: + dtype = x_usm.dtype + + out_usm = None if out is None else dpnp.get_usm_ndarray(out) + x_usm = dpt.round(x_usm * 10**decimals, out=out_usm) + res_usm = dpt.divide(x_usm, 10**decimals, out=out_usm) + if dtype is not None: - res = res.astype(dtype) - return res + res_usm = dpt.astype(res_usm, dtype, copy=False) + + dpnp.synchronize_array_data(res_usm) + if out is not None and isinstance(out, dpnp_array): + return out + return dpnp_array._create_from_usm_ndarray(res_usm) else: return super().__call__(x, out=out, dtype=dtype) diff --git a/dpnp/dpnp_iface.py b/dpnp/dpnp_iface.py index 8e5304cd9eb0..b3103869e8d3 100644 --- a/dpnp/dpnp_iface.py +++ b/dpnp/dpnp_iface.py @@ -240,10 +240,10 @@ def astype(x1, dtype, order="K", casting="unsafe", copy=True, device=None): x1_obj, dtype, order=order, casting=casting, copy=copy, device=device ) - # return x1 if dpctl returns a zero copy of x1_obj + dpnp.synchronize_array_data(x1) if array_obj is x1_obj and isinstance(x1, dpnp_array): + # return x1 if dpctl returns a zero copy of x1_obj return x1 - return dpnp_array._create_from_usm_ndarray(array_obj) From 2ef8f667a7383a2c79da2dfb9f7e654efaa7a829 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Mon, 1 Jul 2024 17:08:11 +0200 Subject: [PATCH 08/11] Updated counting and histogram functions --- dpnp/dpnp_iface_counting.py | 7 ++-- dpnp/dpnp_iface_histograms.py | 66 +++++++++++++++++++++++++---------- 2 files changed, 52 insertions(+), 21 deletions(-) diff --git a/dpnp/dpnp_iface_counting.py b/dpnp/dpnp_iface_counting.py index 8a90601ce8fe..515cad08a06b 100644 --- a/dpnp/dpnp_iface_counting.py +++ b/dpnp/dpnp_iface_counting.py @@ -37,6 +37,8 @@ """ +import dpctl.tensor as dpt + import dpnp __all__ = ["count_nonzero"] @@ -87,5 +89,6 @@ def count_nonzero(a, axis=None, *, keepdims=False): # TODO: might be improved by implementing an extension # with `count_nonzero` kernel - a = dpnp.astype(a, dpnp.bool, copy=False) - return a.sum(axis=axis, dtype=dpnp.intp, keepdims=keepdims) + usm_a = dpnp.get_usm_ndarray(a) + usm_a = dpt.astype(usm_a, dpnp.bool, copy=False) + return dpnp.sum(usm_a, axis=axis, dtype=dpnp.intp, keepdims=keepdims) diff --git a/dpnp/dpnp_iface_histograms.py b/dpnp/dpnp_iface_histograms.py index 1a1b4daf740d..bf3164ab9b6c 100644 --- a/dpnp/dpnp_iface_histograms.py +++ b/dpnp/dpnp_iface_histograms.py @@ -40,11 +40,17 @@ import operator import warnings +import dpctl.tensor as dpt import dpctl.utils as dpu import numpy import dpnp +from .dpnp_algo.dpnp_arraycreation import ( + dpnp_linspace, +) +from .dpnp_array import dpnp_array + __all__ = [ "digitize", "histogram", @@ -60,7 +66,7 @@ def _ravel_check_a_and_weights(a, weights): """Check input `a` and `weights` arrays, and ravel both.""" # ensure that `a` array has supported type - dpnp.check_supported_arrays_type(a) + a = dpnp.get_usm_ndarray(a) usm_type = a.usm_type # ensure that the array is a "subtractable" dtype @@ -71,11 +77,11 @@ def _ravel_check_a_and_weights(a, weights): RuntimeWarning, stacklevel=3, ) - a = a.astype(numpy.uint8) + a = dpt.astype(a, numpy.uint8) if weights is not None: # check that `weights` array has supported type - dpnp.check_supported_arrays_type(weights) + weights = dpnp.get_usm_ndarray(weights) usm_type = dpu.get_coerced_usm_type([usm_type, weights.usm_type]) # check that arrays have the same allocation queue @@ -86,8 +92,9 @@ def _ravel_check_a_and_weights(a, weights): if weights.shape != a.shape: raise ValueError("weights should have the same shape as a.") - weights = weights.ravel() - a = a.ravel() + weights = dpt.reshape(weights, -1) + + a = dpt.reshape(a, -1) return a, weights, usm_type @@ -113,7 +120,7 @@ def _get_outer_edges(a, range): first_edge, last_edge = 0, 1 else: - first_edge, last_edge = a.min(), a.max() + first_edge, last_edge = dpt.min(a), dpt.max(a) if not (dpnp.isfinite(first_edge) and dpnp.isfinite(last_edge)): raise ValueError( f"autodetected range of [{first_edge}, {last_edge}] " @@ -157,9 +164,9 @@ def _get_bin_edges(a, bins, range, usm_type): "a and bins must be allocated on the same SYCL queue" ) - bin_edges = bins + bin_edges = dpnp.get_usm_ndarray(bins) else: - bin_edges = dpnp.asarray( + bin_edges = dpt.asarray( bins, sycl_queue=sycl_queue, usm_type=usm_type ) @@ -183,7 +190,7 @@ def _get_bin_edges(a, bins, range, usm_type): ) # bin edges must be computed - bin_edges = dpnp.linspace( + bin_edges = dpnp_linspace( first_edge, last_edge, n_equal_bins + 1, @@ -204,8 +211,11 @@ def _search_sorted_inclusive(a, v): """ - return dpnp.concatenate( - (a.searchsorted(v[:-1], "left"), a.searchsorted(v[-1:], "right")) + return dpt.concat( + ( + dpt.searchsorted(a, v[:-1], side="left"), + dpt.searchsorted(a, v[-1:], side="right"), + ) ) @@ -297,8 +307,14 @@ def digitize(x, bins, right=False): # Use dpnp.searchsorted directly if bins are increasing return dpnp.searchsorted(bins, x, side=side) + usm_x = dpnp.get_usm_ndarray(x) + usm_bins = dpnp.get_usm_ndarray(bins) + # Reverse bins and adjust indices if bins are decreasing - return bins.size - dpnp.searchsorted(bins[::-1], x, side=side) + usm_res = usm_bins.size - dpt.searchsorted(usm_bins[::-1], usm_x, side=side) + + dpnp.synchronize_array_data(usm_res) + return dpnp_array._create_from_usm_ndarray(usm_res) def histogram(a, bins=10, range=None, density=None, weights=None): @@ -412,26 +428,36 @@ def histogram(a, bins=10, range=None, density=None, weights=None): else: # Compute via cumulative histogram if weights is None: - sa = dpnp.sort(a) + sa = dpt.sort(a) cum_n = _search_sorted_inclusive(sa, bin_edges) else: - zero = dpnp.zeros( + zero = dpt.zeros( 1, dtype=ntype, sycl_queue=a.sycl_queue, usm_type=usm_type ) - sorting_index = dpnp.argsort(a) + sorting_index = dpt.argsort(a) sa = a[sorting_index] sw = weights[sorting_index] - cw = dpnp.concatenate((zero, sw.cumsum(dtype=ntype))) + cw = dpt.concat((zero, dpt.cumulative_sum(sw, dtype=ntype))) bin_index = _search_sorted_inclusive(sa, bin_edges) cum_n = cw[bin_index] n = dpnp.diff(cum_n) + # convert bin_edges to dpnp.ndarray + bin_edges = dpnp_array._create_from_usm_ndarray(bin_edges) + if density: # pylint: disable=possibly-used-before-assignment - db = dpnp.diff(bin_edges).astype(dpnp.default_float_type()) - return n / db / n.sum(), bin_edges + db = dpnp.diff(bin_edges) + db = dpt.astype(db.get_array(), dpnp.default_float_type()) + + usm_n = n.get_array() + hist = usm_n / db / dpt.sum(usm_n) + dpnp.synchronize_array_data(hist) + return dpnp_array._create_from_usm_ndarray(hist), bin_edges + + dpnp.synchronize_array_data(n) return n, bin_edges @@ -517,4 +543,6 @@ def histogram_bin_edges(a, bins=10, range=None, weights=None): a, weights, usm_type = _ravel_check_a_and_weights(a, weights) bin_edges, _ = _get_bin_edges(a, bins, range, usm_type) - return bin_edges + + dpnp.synchronize_array_data(bin_edges) + return dpnp_array._create_from_usm_ndarray(bin_edges) From 12c7a399735e31d340880cc050d0ca4b2a086043 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Mon, 1 Jul 2024 17:27:44 +0200 Subject: [PATCH 09/11] Switched back to use dppy/label/dev for coverage GH action --- .github/workflows/generate_coverage.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/generate_coverage.yaml b/.github/workflows/generate_coverage.yaml index 1fa71fb479dc..5a0480235a7b 100644 --- a/.github/workflows/generate_coverage.yaml +++ b/.github/workflows/generate_coverage.yaml @@ -21,7 +21,7 @@ jobs: env: python-ver: '3.10' - CHANNELS: '-c dppy/label/coverage -c intel -c conda-forge --override-channels' + CHANNELS: '-c dppy/label/dev -c intel -c conda-forge --override-channels' # Install the latest oneAPI compiler to work around an issue INSTALL_ONE_API: 'yes' From fcc66b09c0135d268a16e49be2b2fb221708d83e Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Mon, 1 Jul 2024 17:28:37 +0200 Subject: [PATCH 10/11] Removed dpnp_container.linspace since unused --- dpnp/dpnp_container.py | 33 --------------------------------- 1 file changed, 33 deletions(-) diff --git a/dpnp/dpnp_container.py b/dpnp/dpnp_container.py index 5b17f9e87151..8f70e015393c 100644 --- a/dpnp/dpnp_container.py +++ b/dpnp/dpnp_container.py @@ -47,7 +47,6 @@ "empty", "eye", "full", - "linspace", "ones", "tril", "triu", @@ -245,38 +244,6 @@ def full( return dpnp_array(array_obj.shape, buffer=array_obj, order=order) -def linspace( - start, - stop, - /, - num, - *, - dtype=None, - device=None, - usm_type="device", - sycl_queue=None, - endpoint=True, -): - """Validate input parameters before passing them into `dpctl.tensor` module""" - dpu.validate_usm_type(usm_type, allow_none=False) - sycl_queue_normalized = dpnp.get_normalized_queue_device( - sycl_queue=sycl_queue, device=device - ) - - """Creates `dpnp_array` with evenly spaced numbers of specified interval.""" - array_obj = dpt.linspace( - start, - stop, - num, - dtype=dtype, - usm_type=usm_type, - sycl_queue=sycl_queue_normalized, - endpoint=endpoint, - ) - dpnp.synchronize_array_data(array_obj) - return dpnp_array(array_obj.shape, buffer=array_obj) - - def ones( shape, *, From 59ac91b04fe63c50d31b1978568d2ee823f08bd0 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Mon, 1 Jul 2024 20:18:03 +0200 Subject: [PATCH 11/11] Return dpnp ndarray for linspace, logspace and geomspace internal functions --- dpnp/dpnp_algo/dpnp_arraycreation.py | 13 ++++++++----- dpnp/dpnp_iface_arraycreation.py | 4 ---- dpnp/dpnp_iface_histograms.py | 2 +- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/dpnp/dpnp_algo/dpnp_arraycreation.py b/dpnp/dpnp_algo/dpnp_arraycreation.py index aeb77ae34c7e..b493efac9931 100644 --- a/dpnp/dpnp_algo/dpnp_arraycreation.py +++ b/dpnp/dpnp_algo/dpnp_arraycreation.py @@ -90,7 +90,7 @@ def dpnp_geomspace( dtype=dtype, usm_type=_usm_type, sycl_queue=sycl_queue_normalized, - ) + ).get_array() if num > 0: res[0] = start @@ -102,7 +102,8 @@ def dpnp_geomspace( if axis != 0: res = dpt.moveaxis(res, 0, axis) - return dpt.astype(res, dtype, copy=False) + res = dpt.astype(res, dtype, copy=False) + return dpnp_array._create_from_usm_ndarray(res) def dpnp_linspace( @@ -200,12 +201,14 @@ def dpnp_linspace( dpt.floor(usm_res, out=usm_res) res = dpt.astype(usm_res, dtype, copy=False) + res = dpnp_array._create_from_usm_ndarray(res) + if retstep is True: if dpnp.isscalar(step): step = dpt.asarray( step, usm_type=res.usm_type, sycl_queue=res.sycl_queue ) - return (res, step) + return res, dpnp_array._create_from_usm_ndarray(step) return res @@ -254,12 +257,12 @@ def dpnp_logspace( sycl_queue=sycl_queue, endpoint=endpoint, axis=axis, - ) + ).get_array() dpt.pow(base, res, out=res) if dtype is not None: res = dpt.astype(res, dtype, copy=False) - return res + return dpnp_array._create_from_usm_ndarray(res) class dpnp_nd_grid: diff --git a/dpnp/dpnp_iface_arraycreation.py b/dpnp/dpnp_iface_arraycreation.py index 0f74906d6b80..6698f3f782e8 100644 --- a/dpnp/dpnp_iface_arraycreation.py +++ b/dpnp/dpnp_iface_arraycreation.py @@ -2201,7 +2201,6 @@ def geomspace( ) dpnp.synchronize_array_data(res) - res = dpnp_array._create_from_usm_ndarray(res) return res @@ -2426,10 +2425,8 @@ def linspace( if isinstance(res, tuple): # (result, step) is returning dpnp.synchronize_array_data(res[0]) - res = tuple(dpnp_array._create_from_usm_ndarray(x) for x in res) else: dpnp.synchronize_array_data(res) - res = dpnp_array._create_from_usm_ndarray(res) return res @@ -2660,7 +2657,6 @@ def logspace( ) dpnp.synchronize_array_data(res) - res = dpnp_array._create_from_usm_ndarray(res) return res diff --git a/dpnp/dpnp_iface_histograms.py b/dpnp/dpnp_iface_histograms.py index bf3164ab9b6c..24c8b6aaf78d 100644 --- a/dpnp/dpnp_iface_histograms.py +++ b/dpnp/dpnp_iface_histograms.py @@ -198,7 +198,7 @@ def _get_bin_edges(a, bins, range, usm_type): dtype=bin_type, sycl_queue=sycl_queue, usm_type=usm_type, - ) + ).get_array() return bin_edges, (first_edge, last_edge, n_equal_bins) return bin_edges, None