From 1929d69dd61d3449c6c06ee98ed6df3291818855 Mon Sep 17 00:00:00 2001
From: Anton Volkov <antonwolfy@gmail.com>
Date: Wed, 26 Jun 2024 13:03:55 +0200
Subject: [PATCH 01/11] Update manipulation functions

---
 dpnp/dpnp_iface_manipulation.py | 63 +++++++++++++++++++++------------
 1 file changed, 40 insertions(+), 23 deletions(-)

diff --git a/dpnp/dpnp_iface_manipulation.py b/dpnp/dpnp_iface_manipulation.py
index 056ac7907208..29ab1b52f758 100644
--- a/dpnp/dpnp_iface_manipulation.py
+++ b/dpnp/dpnp_iface_manipulation.py
@@ -39,6 +39,7 @@
 
 
 import dpctl.tensor as dpt
+import dpctl.utils as dpu
 import numpy
 from numpy.core.numeric import normalize_axis_index
 
@@ -668,6 +669,8 @@ def concatenate(
 
     usm_arrays = [dpnp.get_usm_ndarray(x) for x in arrays]
     usm_res = dpt.concat(usm_arrays, axis=axis)
+
+    dpu.SequentialOrderManager[usm_res.sycl_queue].wait()
     res = dpnp_array._create_from_usm_ndarray(usm_res)
     if dtype is not None:
         res = res.astype(dtype, casting=casting, copy=False)
@@ -907,10 +910,11 @@ def expand_dims(a, axis):
 
     """
 
-    usm_array = dpnp.get_usm_ndarray(a)
-    return dpnp_array._create_from_usm_ndarray(
-        dpt.expand_dims(usm_array, axis=axis)
-    )
+    usm_a = dpnp.get_usm_ndarray(a)
+    usm_res = dpt.expand_dims(usm_a, axis=axis)
+
+    dpu.SequentialOrderManager[usm_res.sycl_queue].wait()
+    return dpnp_array._create_from_usm_ndarray(usm_res)
 
 
 def flip(m, axis=None):
@@ -1293,8 +1297,10 @@ def repeat(a, repeats, axis=None):
         usm_arr = dpnp.get_usm_ndarray(a.flatten())
     else:
         usm_arr = dpnp.get_usm_ndarray(a)
-    usm_arr = dpt.repeat(usm_arr, rep, axis=axis)
-    return dpnp_array._create_from_usm_ndarray(usm_arr)
+    usm_res = dpt.repeat(usm_arr, rep, axis=axis)
+
+    dpu.SequentialOrderManager[usm_res.sycl_queue].wait()
+    return dpnp_array._create_from_usm_ndarray(usm_res)
 
 
 def reshape(a, /, newshape, order="C", copy=None):
@@ -1369,9 +1375,11 @@ def reshape(a, /, newshape, order="C", copy=None):
     elif order not in "cfCF":
         raise ValueError(f"order must be one of 'C' or 'F' (got {order})")
 
-    usm_arr = dpnp.get_usm_ndarray(a)
-    usm_arr = dpt.reshape(usm_arr, shape=newshape, order=order, copy=copy)
-    return dpnp_array._create_from_usm_ndarray(usm_arr)
+    usm_a = dpnp.get_usm_ndarray(a)
+    usm_res = dpt.reshape(usm_a, shape=newshape, order=order, copy=copy)
+
+    dpu.SequentialOrderManager[usm_res.sycl_queue].wait()
+    return dpnp_array._create_from_usm_ndarray(usm_res)
 
 
 def result_type(*arrays_and_dtypes):
@@ -1478,10 +1486,12 @@ def roll(x, shift, axis=None):
     """
     if axis is None:
         return roll(x.reshape(-1), shift, 0).reshape(x.shape)
-    usm_array = dpnp.get_usm_ndarray(x)
-    return dpnp_array._create_from_usm_ndarray(
-        dpt.roll(usm_array, shift=shift, axis=axis)
-    )
+
+    usm_x = dpnp.get_usm_ndarray(x)
+    usm_res = dpt.roll(usm_x, shift=shift, axis=axis)
+
+    dpu.SequentialOrderManager[usm_res.sycl_queue].wait()
+    return dpnp_array._create_from_usm_ndarray(usm_res)
 
 
 def rollaxis(x, axis, start=0):
@@ -1628,10 +1638,11 @@ def squeeze(a, /, axis=None):
 
     """
 
-    usm_array = dpnp.get_usm_ndarray(a)
-    return dpnp_array._create_from_usm_ndarray(
-        dpt.squeeze(usm_array, axis=axis)
-    )
+    usm_a = dpnp.get_usm_ndarray(a)
+    usm_res = dpt.squeeze(usm_a, axis=axis)
+
+    dpu.SequentialOrderManager[usm_res.sycl_queue].wait()
+    return dpnp_array._create_from_usm_ndarray(usm_res)
 
 
 def stack(arrays, /, *, axis=0, out=None, dtype=None, casting="same_kind"):
@@ -1709,6 +1720,8 @@ def stack(arrays, /, *, axis=0, out=None, dtype=None, casting="same_kind"):
 
     usm_arrays = [dpnp.get_usm_ndarray(x) for x in arrays]
     usm_res = dpt.stack(usm_arrays, axis=axis)
+
+    dpu.SequentialOrderManager[usm_res.sycl_queue].wait()
     res = dpnp_array._create_from_usm_ndarray(usm_res)
     if dtype is not None:
         res = res.astype(dtype, casting=casting, copy=False)
@@ -1767,10 +1780,11 @@ def swapaxes(a, axis1, axis2):
 
     """
 
-    usm_array = dpnp.get_usm_ndarray(a)
-    return dpnp_array._create_from_usm_ndarray(
-        dpt.swapaxes(usm_array, axis1=axis1, axis2=axis2)
-    )
+    usm_a = dpnp.get_usm_ndarray(a)
+    usm_res = dpt.swapaxes(usm_a, axis1=axis1, axis2=axis2)
+
+    dpu.SequentialOrderManager[usm_res.sycl_queue].wait()
+    return dpnp_array._create_from_usm_ndarray(usm_res)
 
 
 # pylint: disable=invalid-name
@@ -1848,8 +1862,11 @@ def tile(A, reps):
 
     """
 
-    usm_array = dpnp.get_usm_ndarray(A)
-    return dpnp_array._create_from_usm_ndarray(dpt.tile(usm_array, reps))
+    usm_a = dpnp.get_usm_ndarray(A)
+    usm_res = dpt.tile(usm_a, reps)
+
+    dpu.SequentialOrderManager[usm_res.sycl_queue].wait()
+    return dpnp_array._create_from_usm_ndarray(usm_res)
 
 
 def transpose(a, axes=None):

From 553a02dd41baac3cb2eeb451e80aca0eac920678 Mon Sep 17 00:00:00 2001
From: Anton Volkov <antonwolfy@gmail.com>
Date: Wed, 26 Jun 2024 15:20:44 +0200
Subject: [PATCH 02/11] Update functions from the array creation container

---
 dpnp/dpnp_container.py           | 11 ++++++
 dpnp/dpnp_iface_arraycreation.py | 60 ++++++++++++++++++++++----------
 2 files changed, 53 insertions(+), 18 deletions(-)

diff --git a/dpnp/dpnp_container.py b/dpnp/dpnp_container.py
index 5322df3324b4..0b658484005b 100644
--- a/dpnp/dpnp_container.py
+++ b/dpnp/dpnp_container.py
@@ -81,6 +81,7 @@ def arange(
         sycl_queue=sycl_queue_normalized,
     )
 
+    dpu.SequentialOrderManager[array_obj.sycl_queue].wait()
     return dpnp_array(array_obj.shape, buffer=array_obj)
 
 
@@ -133,6 +134,7 @@ def asarray(
         if array_obj is x1_obj and isinstance(x1, dpnp_array):
             return x1
 
+    dpu.SequentialOrderManager[array_obj.sycl_queue].wait()
     return dpnp_array(array_obj.shape, buffer=array_obj, order=order)
 
 
@@ -142,6 +144,7 @@ def copy(x1, /, *, order="K"):
         order = "K"
 
     array_obj = dpt.copy(dpnp.get_usm_ndarray(x1), order=order)
+    dpu.SequentialOrderManager[array_obj.sycl_queue].wait()
     return dpnp_array(array_obj.shape, buffer=array_obj, order="K")
 
 
@@ -203,6 +206,7 @@ def eye(
         usm_type=usm_type,
         sycl_queue=sycl_queue_normalized,
     )
+    dpu.SequentialOrderManager[array_obj.sycl_queue].wait()
     return dpnp_array(array_obj.shape, buffer=array_obj, order=order)
 
 
@@ -237,6 +241,7 @@ def full(
         usm_type=usm_type,
         sycl_queue=sycl_queue_normalized,
     )
+    dpu.SequentialOrderManager[array_obj.sycl_queue].wait()
     return dpnp_array(array_obj.shape, buffer=array_obj, order=order)
 
 
@@ -268,6 +273,7 @@ def linspace(
         sycl_queue=sycl_queue_normalized,
         endpoint=endpoint,
     )
+    dpu.SequentialOrderManager[array_obj.sycl_queue].wait()
     return dpnp_array(array_obj.shape, buffer=array_obj)
 
 
@@ -296,18 +302,21 @@ def ones(
         usm_type=usm_type,
         sycl_queue=sycl_queue_normalized,
     )
+    dpu.SequentialOrderManager[array_obj.sycl_queue].wait()
     return dpnp_array(array_obj.shape, buffer=array_obj, order=order)
 
 
 def tril(x1, /, *, k=0):
     """Creates `dpnp_array` as lower triangular part of an input array."""
     array_obj = dpt.tril(dpnp.get_usm_ndarray(x1), k=k)
+    dpu.SequentialOrderManager[array_obj.sycl_queue].wait()
     return dpnp_array(array_obj.shape, buffer=array_obj, order="K")
 
 
 def triu(x1, /, *, k=0):
     """Creates `dpnp_array` as upper triangular part of an input array."""
     array_obj = dpt.triu(dpnp.get_usm_ndarray(x1), k=k)
+    dpu.SequentialOrderManager[array_obj.sycl_queue].wait()
     return dpnp_array(array_obj.shape, buffer=array_obj, order="K")
 
 
@@ -336,4 +345,6 @@ def zeros(
         usm_type=usm_type,
         sycl_queue=sycl_queue_normalized,
     )
+    # TODO: uncomment once dpctl implements asynchronous call
+    # dpu.SequentialOrderManager[array_obj.sycl_queue].wait()
     return dpnp_array(array_obj.shape, buffer=array_obj, order=order)
diff --git a/dpnp/dpnp_iface_arraycreation.py b/dpnp/dpnp_iface_arraycreation.py
index 5cf63ea0fca0..f51d3297002e 100644
--- a/dpnp/dpnp_iface_arraycreation.py
+++ b/dpnp/dpnp_iface_arraycreation.py
@@ -40,6 +40,8 @@
 
 import operator
 
+import dpctl.tensor as dpt
+import dpctl.utils as dpu
 import numpy
 
 import dpnp
@@ -51,6 +53,10 @@
     dpnp_logspace,
     dpnp_nd_grid,
 )
+from .dpnp_array import dpnp_array
+
+# pylint: disable=no-name-in-module
+from .dpnp_utils import get_usm_allocations, map_dtype_to_device
 
 __all__ = [
     "arange",
@@ -2720,21 +2726,30 @@ def meshgrid(*xi, copy=True, sparse=False, indexing="xy"):
             "Unrecognized indexing keyword value, expecting 'xy' or 'ij'."
         )
 
+    if ndim < 1:
+        return []
+
     s0 = (1,) * ndim
     output = [
-        dpnp.reshape(x, s0[:i] + (-1,) + s0[i + 1 :]) for i, x in enumerate(xi)
+        dpt.reshape(dpnp.get_usm_ndarray(x), s0[:i] + (-1,) + s0[i + 1 :])
+        for i, x in enumerate(xi)
     ]
 
+    # input arrays must be allocated on the same queue
+    _, exec_q = get_usm_allocations(output)
+
     if indexing == "xy" and ndim > 1:
-        output[0] = output[0].reshape((1, -1) + s0[2:])
-        output[1] = output[1].reshape((-1, 1) + s0[2:])
+        output[0] = dpt.reshape(output[0], (1, -1) + s0[2:])
+        output[1] = dpt.reshape(output[1], (-1, 1) + s0[2:])
 
     if not sparse:
-        output = dpnp.broadcast_arrays(*output)
+        output = dpt.broadcast_arrays(*output)
 
     if copy:
-        output = [x.copy() for x in output]
+        output = [dpt.copy(x) for x in output]
 
+    dpu.SequentialOrderManager[exec_q].wait()
+    output = [dpnp_array._create_from_usm_ndarray(x) for x in output]
     return output
 
 
@@ -3261,7 +3276,10 @@ def tri(
 
     _dtype = dpnp.default_float_type() if dtype in (dpnp.float, None) else dtype
 
-    m = dpnp.ones(
+    if usm_type is None:
+        usm_type = "device"
+
+    m = dpt.ones(
         (N, M),
         dtype=_dtype,
         device=device,
@@ -3469,28 +3487,34 @@ def vander(
             [125,  25,   5,   1]]), Device(level_zero:gpu:0), 'host')
     """
 
-    x = dpnp.asarray(x, device=device, usm_type=usm_type, sycl_queue=sycl_queue)
+    if dpnp.is_supported_array_type(x):
+        x = dpnp.get_usm_ndarray(x)
+    usm_x = dpt.asarray(
+        x, device=device, usm_type=usm_type, sycl_queue=sycl_queue
+    )
+
+    x_sycl_queue = usm_x.sycl_queue
+    x_usm_type = usm_x.usm_type
 
     if N is not None and not isinstance(N, int):
         raise TypeError(f"An integer is required, but got {type(N)}")
 
-    if x.ndim != 1:
+    if usm_x.ndim != 1:
         raise ValueError("`x` must be a one-dimensional array or sequence.")
 
     if N is None:
-        N = x.size
+        N = usm_x.size
+
+    _dtype = numpy.promote_types(usm_x.dtype, int)
+    _dtype = map_dtype_to_device(_dtype, x_sycl_queue.sycl_device)
+    m = dpnp.empty_like(usm_x, shape=(usm_x.size, N), dtype=_dtype)
 
-    _dtype = int if x.dtype == bool else x.dtype
-    m = empty(
-        (x.size, N),
-        dtype=_dtype,
-        usm_type=x.usm_type,
-        sycl_queue=x.sycl_queue,
-    )
     tmp = m[:, ::-1] if not increasing else m
     dpnp.power(
-        x.reshape(-1, 1),
-        dpnp.arange(N, dtype=_dtype, sycl_queue=x.sycl_queue),
+        dpt.reshape(usm_x, (-1, 1)),
+        dpt.arange(
+            N, dtype=_dtype, usm_type=x_usm_type, sycl_queue=x_sycl_queue
+        ),
         out=tmp,
     )
     return m

From 5e5f895485659ed99c909ba484332042ed2974dc Mon Sep 17 00:00:00 2001
From: Anton Volkov <antonwolfy@gmail.com>
Date: Wed, 26 Jun 2024 16:31:36 +0200
Subject: [PATCH 03/11] Update dpnp array methods

---
 dpnp/dpnp_array.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/dpnp/dpnp_array.py b/dpnp/dpnp_array.py
index fd2d06f74285..c25bba998b31 100644
--- a/dpnp/dpnp_array.py
+++ b/dpnp/dpnp_array.py
@@ -25,6 +25,7 @@
 # *****************************************************************************
 
 import dpctl.tensor as dpt
+import dpctl.utils as dpu
 
 import dpnp
 
@@ -258,6 +259,8 @@ def __getitem__(self, key):
         res = self.__new__(dpnp_array)
         res._array_obj = item
 
+        if self._array_obj.usm_data is not res._array_obj.usm_data:
+            dpu.SequentialOrderManager[self.sycl_queue].wait()
         return res
 
     def __gt__(self, other):
@@ -454,6 +457,7 @@ def __setitem__(self, key, val):
             val = val.get_array()
 
         self._array_obj.__setitem__(key, val)
+        dpu.SequentialOrderManager[self.sycl_queue].wait()
 
     # '__setstate__',
     # '__sizeof__',

From 51746d19ed52cf9a2c1cd24cfa3548837be2d540 Mon Sep 17 00:00:00 2001
From: Anton Volkov <antonwolfy@gmail.com>
Date: Fri, 28 Jun 2024 15:36:10 +0200
Subject: [PATCH 04/11] Implement backward compatible solution

---
 dpnp/dpnp_array.py               |  5 ++---
 dpnp/dpnp_container.py           | 20 ++++++++++----------
 dpnp/dpnp_iface.py               | 15 +++++++++++++++
 dpnp/dpnp_iface_arraycreation.py |  5 ++---
 dpnp/dpnp_iface_manipulation.py  | 21 +++++++++++----------
 5 files changed, 40 insertions(+), 26 deletions(-)

diff --git a/dpnp/dpnp_array.py b/dpnp/dpnp_array.py
index c25bba998b31..d9936872a890 100644
--- a/dpnp/dpnp_array.py
+++ b/dpnp/dpnp_array.py
@@ -25,7 +25,6 @@
 # *****************************************************************************
 
 import dpctl.tensor as dpt
-import dpctl.utils as dpu
 
 import dpnp
 
@@ -260,7 +259,7 @@ def __getitem__(self, key):
         res._array_obj = item
 
         if self._array_obj.usm_data is not res._array_obj.usm_data:
-            dpu.SequentialOrderManager[self.sycl_queue].wait()
+            dpnp.synchronize_array_data(self)
         return res
 
     def __gt__(self, other):
@@ -457,7 +456,7 @@ def __setitem__(self, key, val):
             val = val.get_array()
 
         self._array_obj.__setitem__(key, val)
-        dpu.SequentialOrderManager[self.sycl_queue].wait()
+        dpnp.synchronize_array_data(self)
 
     # '__setstate__',
     # '__sizeof__',
diff --git a/dpnp/dpnp_container.py b/dpnp/dpnp_container.py
index 0b658484005b..5b17f9e87151 100644
--- a/dpnp/dpnp_container.py
+++ b/dpnp/dpnp_container.py
@@ -81,7 +81,7 @@ def arange(
         sycl_queue=sycl_queue_normalized,
     )
 
-    dpu.SequentialOrderManager[array_obj.sycl_queue].wait()
+    dpnp.synchronize_array_data(array_obj)
     return dpnp_array(array_obj.shape, buffer=array_obj)
 
 
@@ -134,7 +134,7 @@ def asarray(
         if array_obj is x1_obj and isinstance(x1, dpnp_array):
             return x1
 
-    dpu.SequentialOrderManager[array_obj.sycl_queue].wait()
+    dpnp.synchronize_array_data(array_obj)
     return dpnp_array(array_obj.shape, buffer=array_obj, order=order)
 
 
@@ -144,7 +144,7 @@ def copy(x1, /, *, order="K"):
         order = "K"
 
     array_obj = dpt.copy(dpnp.get_usm_ndarray(x1), order=order)
-    dpu.SequentialOrderManager[array_obj.sycl_queue].wait()
+    dpnp.synchronize_array_data(array_obj)
     return dpnp_array(array_obj.shape, buffer=array_obj, order="K")
 
 
@@ -206,7 +206,7 @@ def eye(
         usm_type=usm_type,
         sycl_queue=sycl_queue_normalized,
     )
-    dpu.SequentialOrderManager[array_obj.sycl_queue].wait()
+    dpnp.synchronize_array_data(array_obj)
     return dpnp_array(array_obj.shape, buffer=array_obj, order=order)
 
 
@@ -241,7 +241,7 @@ def full(
         usm_type=usm_type,
         sycl_queue=sycl_queue_normalized,
     )
-    dpu.SequentialOrderManager[array_obj.sycl_queue].wait()
+    dpnp.synchronize_array_data(array_obj)
     return dpnp_array(array_obj.shape, buffer=array_obj, order=order)
 
 
@@ -273,7 +273,7 @@ def linspace(
         sycl_queue=sycl_queue_normalized,
         endpoint=endpoint,
     )
-    dpu.SequentialOrderManager[array_obj.sycl_queue].wait()
+    dpnp.synchronize_array_data(array_obj)
     return dpnp_array(array_obj.shape, buffer=array_obj)
 
 
@@ -302,21 +302,21 @@ def ones(
         usm_type=usm_type,
         sycl_queue=sycl_queue_normalized,
     )
-    dpu.SequentialOrderManager[array_obj.sycl_queue].wait()
+    dpnp.synchronize_array_data(array_obj)
     return dpnp_array(array_obj.shape, buffer=array_obj, order=order)
 
 
 def tril(x1, /, *, k=0):
     """Creates `dpnp_array` as lower triangular part of an input array."""
     array_obj = dpt.tril(dpnp.get_usm_ndarray(x1), k=k)
-    dpu.SequentialOrderManager[array_obj.sycl_queue].wait()
+    dpnp.synchronize_array_data(array_obj)
     return dpnp_array(array_obj.shape, buffer=array_obj, order="K")
 
 
 def triu(x1, /, *, k=0):
     """Creates `dpnp_array` as upper triangular part of an input array."""
     array_obj = dpt.triu(dpnp.get_usm_ndarray(x1), k=k)
-    dpu.SequentialOrderManager[array_obj.sycl_queue].wait()
+    dpnp.synchronize_array_data(array_obj)
     return dpnp_array(array_obj.shape, buffer=array_obj, order="K")
 
 
@@ -346,5 +346,5 @@ def zeros(
         sycl_queue=sycl_queue_normalized,
     )
     # TODO: uncomment once dpctl implements asynchronous call
-    # dpu.SequentialOrderManager[array_obj.sycl_queue].wait()
+    # dpnp.synchronize_array_data(array_obj)
     return dpnp_array(array_obj.shape, buffer=array_obj, order=order)
diff --git a/dpnp/dpnp_iface.py b/dpnp/dpnp_iface.py
index 49e7b41c01c9..8e5304cd9eb0 100644
--- a/dpnp/dpnp_iface.py
+++ b/dpnp/dpnp_iface.py
@@ -42,6 +42,7 @@
 
 import dpctl
 import dpctl.tensor as dpt
+import dpctl.utils as dpu
 import numpy
 from dpctl.tensor._device import normalize_queue_device
 
@@ -69,6 +70,7 @@
     "get_usm_ndarray_or_scalar",
     "is_supported_array_or_scalar",
     "is_supported_array_type",
+    "synchronize_array_data",
 ]
 
 from dpnp import float64, isscalar
@@ -699,3 +701,16 @@ def is_supported_array_type(a):
     """
 
     return isinstance(a, (dpnp_array, dpt.usm_ndarray))
+
+
+def synchronize_array_data(a):
+    """
+    The dpctl interface was reworked to make asynchronous execution.
+    That function makes a synchronization call to ensure array data is valid
+    before exit from dpnp interface function.
+
+    """
+
+    if hasattr(dpu, "SequentialOrderManager"):
+        check_supported_arrays_type(a)
+        dpu.SequentialOrderManager[a.sycl_queue].wait()
diff --git a/dpnp/dpnp_iface_arraycreation.py b/dpnp/dpnp_iface_arraycreation.py
index f51d3297002e..e80f50fea72f 100644
--- a/dpnp/dpnp_iface_arraycreation.py
+++ b/dpnp/dpnp_iface_arraycreation.py
@@ -41,7 +41,6 @@
 import operator
 
 import dpctl.tensor as dpt
-import dpctl.utils as dpu
 import numpy
 
 import dpnp
@@ -2736,7 +2735,7 @@ def meshgrid(*xi, copy=True, sparse=False, indexing="xy"):
     ]
 
     # input arrays must be allocated on the same queue
-    _, exec_q = get_usm_allocations(output)
+    _, _ = get_usm_allocations(output)
 
     if indexing == "xy" and ndim > 1:
         output[0] = dpt.reshape(output[0], (1, -1) + s0[2:])
@@ -2748,7 +2747,7 @@ def meshgrid(*xi, copy=True, sparse=False, indexing="xy"):
     if copy:
         output = [dpt.copy(x) for x in output]
 
-    dpu.SequentialOrderManager[exec_q].wait()
+    dpnp.synchronize_array_data(output[0])
     output = [dpnp_array._create_from_usm_ndarray(x) for x in output]
     return output
 
diff --git a/dpnp/dpnp_iface_manipulation.py b/dpnp/dpnp_iface_manipulation.py
index 28165aab4ba3..a4b7352d4e64 100644
--- a/dpnp/dpnp_iface_manipulation.py
+++ b/dpnp/dpnp_iface_manipulation.py
@@ -39,7 +39,6 @@
 
 
 import dpctl.tensor as dpt
-import dpctl.utils as dpu
 import numpy
 from numpy.core.numeric import normalize_axis_index
 
@@ -670,13 +669,14 @@ def concatenate(
     usm_arrays = [dpnp.get_usm_ndarray(x) for x in arrays]
     usm_res = dpt.concat(usm_arrays, axis=axis)
 
-    dpu.SequentialOrderManager[usm_res.sycl_queue].wait()
     res = dpnp_array._create_from_usm_ndarray(usm_res)
     if dtype is not None:
         res = res.astype(dtype, casting=casting, copy=False)
     elif out is not None:
         dpnp.copyto(out, res, casting=casting)
         return out
+
+    dpnp.synchronize_array_data(res)
     return res
 
 
@@ -913,7 +913,7 @@ def expand_dims(a, axis):
     usm_a = dpnp.get_usm_ndarray(a)
     usm_res = dpt.expand_dims(usm_a, axis=axis)
 
-    dpu.SequentialOrderManager[usm_res.sycl_queue].wait()
+    dpnp.synchronize_array_data(usm_res)
     return dpnp_array._create_from_usm_ndarray(usm_res)
 
 
@@ -1304,7 +1304,7 @@ def repeat(a, repeats, axis=None):
     usm_arr = dpnp.get_usm_ndarray(a)
     usm_res = dpt.repeat(usm_arr, repeats, axis=axis)
 
-    dpu.SequentialOrderManager[usm_res.sycl_queue].wait()
+    dpnp.synchronize_array_data(usm_res)
     return dpnp_array._create_from_usm_ndarray(usm_res)
 
 
@@ -1383,7 +1383,7 @@ def reshape(a, /, newshape, order="C", copy=None):
     usm_a = dpnp.get_usm_ndarray(a)
     usm_res = dpt.reshape(usm_a, shape=newshape, order=order, copy=copy)
 
-    dpu.SequentialOrderManager[usm_res.sycl_queue].wait()
+    dpnp.synchronize_array_data(usm_res)
     return dpnp_array._create_from_usm_ndarray(usm_res)
 
 
@@ -1495,7 +1495,7 @@ def roll(x, shift, axis=None):
     usm_x = dpnp.get_usm_ndarray(x)
     usm_res = dpt.roll(usm_x, shift=shift, axis=axis)
 
-    dpu.SequentialOrderManager[usm_res.sycl_queue].wait()
+    dpnp.synchronize_array_data(usm_res)
     return dpnp_array._create_from_usm_ndarray(usm_res)
 
 
@@ -1646,7 +1646,7 @@ def squeeze(a, /, axis=None):
     usm_a = dpnp.get_usm_ndarray(a)
     usm_res = dpt.squeeze(usm_a, axis=axis)
 
-    dpu.SequentialOrderManager[usm_res.sycl_queue].wait()
+    dpnp.synchronize_array_data(usm_res)
     return dpnp_array._create_from_usm_ndarray(usm_res)
 
 
@@ -1726,13 +1726,14 @@ def stack(arrays, /, *, axis=0, out=None, dtype=None, casting="same_kind"):
     usm_arrays = [dpnp.get_usm_ndarray(x) for x in arrays]
     usm_res = dpt.stack(usm_arrays, axis=axis)
 
-    dpu.SequentialOrderManager[usm_res.sycl_queue].wait()
     res = dpnp_array._create_from_usm_ndarray(usm_res)
     if dtype is not None:
         res = res.astype(dtype, casting=casting, copy=False)
     elif out is not None:
         dpnp.copyto(out, res, casting=casting)
         return out
+
+    dpnp.synchronize_array_data(res)
     return res
 
 
@@ -1788,7 +1789,7 @@ def swapaxes(a, axis1, axis2):
     usm_a = dpnp.get_usm_ndarray(a)
     usm_res = dpt.swapaxes(usm_a, axis1=axis1, axis2=axis2)
 
-    dpu.SequentialOrderManager[usm_res.sycl_queue].wait()
+    dpnp.synchronize_array_data(usm_res)
     return dpnp_array._create_from_usm_ndarray(usm_res)
 
 
@@ -1870,7 +1871,7 @@ def tile(A, reps):
     usm_a = dpnp.get_usm_ndarray(A)
     usm_res = dpt.tile(usm_a, reps)
 
-    dpu.SequentialOrderManager[usm_res.sycl_queue].wait()
+    dpnp.synchronize_array_data(usm_res)
     return dpnp_array._create_from_usm_ndarray(usm_res)
 
 

From e679694303592b5e5481ae657836123455819fdd Mon Sep 17 00:00:00 2001
From: Anton Volkov <antonwolfy@gmail.com>
Date: Fri, 28 Jun 2024 16:52:08 +0200
Subject: [PATCH 05/11] dpnp.meshgrid has to follow CFD and prohibit input
 arrays allocating on different SYCL queues

---
 tests/test_sycl_queue.py | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/tests/test_sycl_queue.py b/tests/test_sycl_queue.py
index 378ecaf9b197..f7c70320dbfb 100644
--- a/tests/test_sycl_queue.py
+++ b/tests/test_sycl_queue.py
@@ -373,18 +373,13 @@ def test_array_creation_load_txt(device):
 
 
 @pytest.mark.parametrize(
-    "device_x",
-    valid_devices,
-    ids=[device.filter_string for device in valid_devices],
-)
-@pytest.mark.parametrize(
-    "device_y",
+    "device",
     valid_devices,
     ids=[device.filter_string for device in valid_devices],
 )
-def test_meshgrid(device_x, device_y):
-    x = dpnp.arange(100, device=device_x)
-    y = dpnp.arange(100, device=device_y)
+def test_meshgrid(device):
+    x = dpnp.arange(100, device=device)
+    y = dpnp.arange(100, device=device)
     z = dpnp.meshgrid(x, y)
     assert_sycl_queue_equal(z[0].sycl_queue, x.sycl_queue)
     assert_sycl_queue_equal(z[1].sycl_queue, y.sycl_queue)

From 6f33b3d40ccb01d47ba8424aab9afa1fd3a48ad5 Mon Sep 17 00:00:00 2001
From: Anton Volkov <antonwolfy@gmail.com>
Date: Mon, 1 Jul 2024 14:57:14 +0200
Subject: [PATCH 06/11] updated linspace, logspace and geomspace functions

---
 dpnp/dpnp_algo/dpnp_arraycreation.py | 107 ++++++++++++++-------------
 dpnp/dpnp_iface_arraycreation.py     |  22 +++++-
 2 files changed, 73 insertions(+), 56 deletions(-)

diff --git a/dpnp/dpnp_algo/dpnp_arraycreation.py b/dpnp/dpnp_algo/dpnp_arraycreation.py
index 83cd9da4acf2..aeb77ae34c7e 100644
--- a/dpnp/dpnp_algo/dpnp_arraycreation.py
+++ b/dpnp/dpnp_algo/dpnp_arraycreation.py
@@ -1,12 +1,13 @@
 import math
 import operator
 
+import dpctl.tensor as dpt
 import dpctl.utils as dpu
 import numpy
 
 import dpnp
-import dpnp.dpnp_container as dpnp_container
 import dpnp.dpnp_utils as utils
+from dpnp.dpnp_array import dpnp_array
 
 __all__ = [
     "dpnp_geomspace",
@@ -16,6 +17,12 @@
 ]
 
 
+def _as_usm_ndarray(a, usm_type, sycl_queue):
+    if isinstance(a, dpnp_array):
+        return a.get_array()
+    return dpt.asarray(a, usm_type=usm_type, sycl_queue=sycl_queue)
+
+
 def dpnp_geomspace(
     start,
     stop,
@@ -40,14 +47,8 @@ def dpnp_geomspace(
     else:
         _usm_type = usm_type
 
-    if not dpnp.is_supported_array_type(start):
-        start = dpnp.asarray(
-            start, usm_type=_usm_type, sycl_queue=sycl_queue_normalized
-        )
-    if not dpnp.is_supported_array_type(stop):
-        stop = dpnp.asarray(
-            stop, usm_type=_usm_type, sycl_queue=sycl_queue_normalized
-        )
+    start = _as_usm_ndarray(start, _usm_type, sycl_queue_normalized)
+    stop = _as_usm_ndarray(stop, _usm_type, sycl_queue_normalized)
 
     dt = numpy.result_type(start, stop, float(num))
     dt = utils.map_dtype_to_device(dt, sycl_queue_normalized.sycl_device)
@@ -57,8 +58,8 @@ def dpnp_geomspace(
     if dpnp.any(start == 0) or dpnp.any(stop == 0):
         raise ValueError("Geometric sequence cannot include zero")
 
-    out_sign = dpnp.ones(
-        dpnp.broadcast_arrays(start, stop)[0].shape,
+    out_sign = dpt.ones(
+        dpt.broadcast_arrays(start, stop)[0].shape,
         dtype=dt,
         usm_type=_usm_type,
         sycl_queue=sycl_queue_normalized,
@@ -72,15 +73,15 @@ def dpnp_geomspace(
             stop[all_imag] = stop[all_imag].imag
             out_sign[all_imag] = 1j
 
-    both_negative = (dpnp.sign(start) == -1) & (dpnp.sign(stop) == -1)
+    both_negative = (dpt.sign(start) == -1) & (dpt.sign(stop) == -1)
     if dpnp.any(both_negative):
-        dpnp.negative(start[both_negative], out=start[both_negative])
-        dpnp.negative(stop[both_negative], out=stop[both_negative])
-        dpnp.negative(out_sign[both_negative], out=out_sign[both_negative])
+        dpt.negative(start[both_negative], out=start[both_negative])
+        dpt.negative(stop[both_negative], out=stop[both_negative])
+        dpt.negative(out_sign[both_negative], out=out_sign[both_negative])
 
-    log_start = dpnp.log10(start)
-    log_stop = dpnp.log10(stop)
-    result = dpnp_logspace(
+    log_start = dpt.log10(start)
+    log_stop = dpt.log10(stop)
+    res = dpnp_logspace(
         log_start,
         log_stop,
         num=num,
@@ -92,16 +93,16 @@ def dpnp_geomspace(
     )
 
     if num > 0:
-        result[0] = start
+        res[0] = start
         if num > 1 and endpoint:
-            result[-1] = stop
+            res[-1] = stop
 
-    result = out_sign * result
+    res = out_sign * res
 
     if axis != 0:
-        result = dpnp.moveaxis(result, 0, axis)
+        res = dpt.moveaxis(res, 0, axis)
 
-    return result.astype(dtype, copy=False)
+    return dpt.astype(res, dtype, copy=False)
 
 
 def dpnp_linspace(
@@ -129,14 +130,11 @@ def dpnp_linspace(
     else:
         _usm_type = usm_type
 
-    if not hasattr(start, "dtype") and not dpnp.isscalar(start):
-        start = dpnp.asarray(
-            start, usm_type=_usm_type, sycl_queue=sycl_queue_normalized
-        )
-    if not hasattr(stop, "dtype") and not dpnp.isscalar(stop):
-        stop = dpnp.asarray(
-            stop, usm_type=_usm_type, sycl_queue=sycl_queue_normalized
-        )
+    if not dpnp.isscalar(start):
+        start = _as_usm_ndarray(start, _usm_type, sycl_queue_normalized)
+
+    if not dpnp.isscalar(stop):
+        stop = _as_usm_ndarray(stop, _usm_type, sycl_queue_normalized)
 
     dt = numpy.result_type(start, stop, float(num))
     dt = utils.map_dtype_to_device(dt, sycl_queue_normalized.sycl_device)
@@ -155,7 +153,7 @@ def dpnp_linspace(
 
     if dpnp.isscalar(start) and dpnp.isscalar(stop):
         # Call linspace() function for scalars.
-        res = dpnp_container.linspace(
+        usm_res = dpt.linspace(
             start,
             stop,
             num,
@@ -167,17 +165,17 @@ def dpnp_linspace(
         if retstep is True and step_nan is False:
             step = (stop - start) / step_num
     else:
-        _start = dpnp.asarray(
+        usm_start = dpt.asarray(
             start,
             dtype=dt,
             usm_type=_usm_type,
             sycl_queue=sycl_queue_normalized,
         )
-        _stop = dpnp.asarray(
+        usm_stop = dpt.asarray(
             stop, dtype=dt, usm_type=_usm_type, sycl_queue=sycl_queue_normalized
         )
 
-        res = dpnp_container.arange(
+        usm_res = dpt.arange(
             0,
             stop=num,
             step=1,
@@ -187,28 +185,27 @@ def dpnp_linspace(
         )
 
         if step_nan is False:
-            step = (_stop - _start) / step_num
-            res = res.reshape((-1,) + (1,) * step.ndim)
-            res = res * step + _start
+            step = (usm_stop - usm_start) / step_num
+            usm_res = dpt.reshape(usm_res, (-1,) + (1,) * step.ndim, copy=False)
+            usm_res = usm_res * step
+            usm_res += usm_start
 
         if endpoint and num > 1:
-            res[-1] = dpnp_container.full(step.shape, _stop)
+            usm_res[-1] = dpt.full(step.shape, usm_stop)
 
     if axis != 0:
-        res = dpnp.moveaxis(res, 0, axis)
+        usm_res = dpt.moveaxis(usm_res, 0, axis)
 
     if numpy.issubdtype(dtype, dpnp.integer):
-        dpnp.floor(res, out=res)
-
-    res = res.astype(dtype, copy=False)
+        dpt.floor(usm_res, out=usm_res)
 
+    res = dpt.astype(usm_res, dtype, copy=False)
     if retstep is True:
         if dpnp.isscalar(step):
-            step = dpnp.asarray(
+            step = dpt.asarray(
                 step, usm_type=res.usm_type, sycl_queue=res.sycl_queue
             )
         return (res, step)
-
     return res
 
 
@@ -239,12 +236,15 @@ def dpnp_logspace(
             usm_type = "device" if usm_type_alloc is None else usm_type_alloc
         else:
             usm_type = usm_type
-        start = dpnp.asarray(start, usm_type=usm_type, sycl_queue=sycl_queue)
-        stop = dpnp.asarray(stop, usm_type=usm_type, sycl_queue=sycl_queue)
-        base = dpnp.asarray(base, usm_type=usm_type, sycl_queue=sycl_queue)
-        [start, stop, base] = dpnp.broadcast_arrays(start, stop, base)
-        base = dpnp.expand_dims(base, axis=axis)
 
+        start = _as_usm_ndarray(start, usm_type, sycl_queue)
+        stop = _as_usm_ndarray(stop, usm_type, sycl_queue)
+        base = _as_usm_ndarray(base, usm_type, sycl_queue)
+
+        [start, stop, base] = dpt.broadcast_arrays(start, stop, base)
+        base = dpt.expand_dims(base, axis=axis)
+
+    # assume res as not a tuple, because retstep is False
     res = dpnp_linspace(
         start,
         stop,
@@ -256,9 +256,10 @@ def dpnp_logspace(
         axis=axis,
     )
 
-    if dtype is None:
-        return dpnp.power(base, res)
-    return dpnp.power(base, res).astype(dtype, copy=False)
+    dpt.pow(base, res, out=res)
+    if dtype is not None:
+        res = dpt.astype(res, dtype, copy=False)
+    return res
 
 
 class dpnp_nd_grid:
diff --git a/dpnp/dpnp_iface_arraycreation.py b/dpnp/dpnp_iface_arraycreation.py
index e80f50fea72f..0f74906d6b80 100644
--- a/dpnp/dpnp_iface_arraycreation.py
+++ b/dpnp/dpnp_iface_arraycreation.py
@@ -2188,7 +2188,7 @@ def geomspace(
 
     """
 
-    return dpnp_geomspace(
+    res = dpnp_geomspace(
         start,
         stop,
         num,
@@ -2200,6 +2200,10 @@ def geomspace(
         axis=axis,
     )
 
+    dpnp.synchronize_array_data(res)
+    res = dpnp_array._create_from_usm_ndarray(res)
+    return res
+
 
 def identity(
     n,
@@ -2407,7 +2411,7 @@ def linspace(
 
     """
 
-    return dpnp_linspace(
+    res = dpnp_linspace(
         start,
         stop,
         num,
@@ -2420,6 +2424,14 @@ def linspace(
         axis=axis,
     )
 
+    if isinstance(res, tuple):  # (result, step) is returning
+        dpnp.synchronize_array_data(res[0])
+        res = tuple(dpnp_array._create_from_usm_ndarray(x) for x in res)
+    else:
+        dpnp.synchronize_array_data(res)
+        res = dpnp_array._create_from_usm_ndarray(res)
+    return res
+
 
 def loadtxt(
     fname,
@@ -2634,7 +2646,7 @@ def logspace(
 
     """
 
-    return dpnp_logspace(
+    res = dpnp_logspace(
         start,
         stop,
         num=num,
@@ -2647,6 +2659,10 @@ def logspace(
         axis=axis,
     )
 
+    dpnp.synchronize_array_data(res)
+    res = dpnp_array._create_from_usm_ndarray(res)
+    return res
+
 
 # pylint: disable=redefined-outer-name
 def meshgrid(*xi, copy=True, sparse=False, indexing="xy"):

From c207efc80e8ed86d23f1d1cf7a34cc72ef7b9bdd Mon Sep 17 00:00:00 2001
From: Anton Volkov <antonwolfy@gmail.com>
Date: Mon, 1 Jul 2024 15:55:20 +0200
Subject: [PATCH 07/11] Updated elementwise functions and astype

---
 dpnp/dpnp_algo/dpnp_elementwise_common.py | 125 +++++++++++++---------
 dpnp/dpnp_iface.py                        |   4 +-
 2 files changed, 76 insertions(+), 53 deletions(-)

diff --git a/dpnp/dpnp_algo/dpnp_elementwise_common.py b/dpnp/dpnp_algo/dpnp_elementwise_common.py
index 374981a63031..b13ea56bc329 100644
--- a/dpnp/dpnp_algo/dpnp_elementwise_common.py
+++ b/dpnp/dpnp_algo/dpnp_elementwise_common.py
@@ -24,6 +24,7 @@
 # THE POSSIBILITY OF SUCH DAMAGE.
 # *****************************************************************************
 
+import dpctl.tensor as dpt
 import numpy
 from dpctl.tensor._elementwise_common import (
     BinaryElementwiseFunc,
@@ -161,24 +162,27 @@ def __call__(
                 f"Requested function={self.name_} only takes `out` or `dtype`"
                 "as an argument, but both were provided."
             )
+
+        if order is None:
+            order = "K"
+        elif order in "afkcAFKC":
+            order = order.upper()
         else:
-            if order is None:
-                order = "K"
-            elif order in "afkcAFKC":
-                order = order.upper()
-            else:
-                raise ValueError(
-                    "order must be one of 'C', 'F', 'A', or 'K' "
-                    f"(got '{order}')"
-                )
-            if dtype is not None:
-                x = dpnp.astype(x, dtype=dtype, copy=False)
-            x_usm = dpnp.get_usm_ndarray(x)
-            out_usm = None if out is None else dpnp.get_usm_ndarray(out)
-            res_usm = super().__call__(x_usm, out=out_usm, order=order)
-            if out is not None and isinstance(out, dpnp_array):
-                return out
-            return dpnp_array._create_from_usm_ndarray(res_usm)
+            raise ValueError(
+                "order must be one of 'C', 'F', 'A', or 'K' " f"(got '{order}')"
+            )
+
+        x_usm = dpnp.get_usm_ndarray(x)
+        if dtype is not None:
+            x_usm = dpt.astype(x_usm, dtype, copy=False)
+
+        out_usm = None if out is None else dpnp.get_usm_ndarray(out)
+        res_usm = super().__call__(x_usm, out=out_usm, order=order)
+
+        dpnp.synchronize_array_data(res_usm)
+        if out is not None and isinstance(out, dpnp_array):
+            return out
+        return dpnp_array._create_from_usm_ndarray(res_usm)
 
 
 class DPNPBinaryFunc(BinaryElementwiseFunc):
@@ -311,35 +315,47 @@ def __call__(
                 f"Requested function={self.name_} only takes `out` or `dtype`"
                 "as an argument, but both were provided."
             )
+
+        if order is None:
+            order = "K"
+        elif order in "afkcAFKC":
+            order = order.upper()
         else:
-            if order is None:
-                order = "K"
-            elif order in "afkcAFKC":
-                order = order.upper()
-            else:
-                raise ValueError(
-                    "order must be one of 'C', 'F', 'A', or 'K' "
-                    f"(got '{order}')"
+            raise ValueError(
+                "order must be one of 'C', 'F', 'A', or 'K' (got '{order}')"
+            )
+
+        x1_usm = dpnp.get_usm_ndarray_or_scalar(x1)
+        x2_usm = dpnp.get_usm_ndarray_or_scalar(x2)
+
+        if dtype is not None:
+            if dpnp.isscalar(x1):
+                x1_usm = dpt.asarray(
+                    x1,
+                    dtype=dtype,
+                    sycl_queue=x2.sycl_queue,
+                    usm_type=x2.usm_type,
                 )
-            if dtype is not None:
-                if dpnp.isscalar(x1):
-                    x1 = dpnp.asarray(x1, dtype=dtype)
-                    x2 = dpnp.astype(x2, dtype=dtype, copy=False)
-                elif dpnp.isscalar(x2):
-                    x1 = dpnp.astype(x1, dtype=dtype, copy=False)
-                    x2 = dpnp.asarray(x2, dtype=dtype)
-                else:
-                    x1 = dpnp.astype(x1, dtype=dtype, copy=False)
-                    x2 = dpnp.astype(x2, dtype=dtype, copy=False)
-
-            x1_usm = dpnp.get_usm_ndarray_or_scalar(x1)
-            x2_usm = dpnp.get_usm_ndarray_or_scalar(x2)
+                x2_usm = dpt.astype(x2_usm, dtype, copy=False)
+            elif dpnp.isscalar(x2):
+                x1_usm = dpt.astype(x1_usm, dtype, copy=False)
+                x2_usm = dpt.asarray(
+                    x2,
+                    dtype=dtype,
+                    sycl_queue=x1.sycl_queue,
+                    usm_type=x1.usm_type,
+                )
+            else:
+                x1_usm = dpt.astype(x1_usm, dtype, copy=False)
+                x2_usm = dpt.astype(x2_usm, dtype, copy=False)
 
-            out_usm = None if out is None else dpnp.get_usm_ndarray(out)
-            res_usm = super().__call__(x1_usm, x2_usm, out=out_usm, order=order)
-            if out is not None and isinstance(out, dpnp_array):
-                return out
-            return dpnp_array._create_from_usm_ndarray(res_usm)
+        out_usm = None if out is None else dpnp.get_usm_ndarray(out)
+        res_usm = super().__call__(x1_usm, x2_usm, out=out_usm, order=order)
+
+        dpnp.synchronize_array_data(res_usm)
+        if out is not None and isinstance(out, dpnp_array):
+            return out
+        return dpnp_array._create_from_usm_ndarray(res_usm)
 
     def outer(
         self,
@@ -463,7 +479,7 @@ def __init__(
     def __call__(self, x, deg=False):
         res = super().__call__(x)
         if deg is True:
-            res = res * (180 / dpnp.pi)
+            res *= 180 / dpnp.pi
         return res
 
 
@@ -513,14 +529,21 @@ def __init__(
 
     def __call__(self, x, decimals=0, out=None, dtype=None):
         if decimals != 0:
-            if dpnp.issubdtype(x.dtype, dpnp.integer) and dtype is None:
-                dtype = x.dtype
-            res = dpnp.true_divide(
-                dpnp.rint(x * 10**decimals, out=out), 10**decimals, out=out
-            )
+            x_usm = dpnp.get_usm_ndarray(x)
+            if dpnp.issubdtype(x_usm.dtype, dpnp.integer) and dtype is None:
+                dtype = x_usm.dtype
+
+            out_usm = None if out is None else dpnp.get_usm_ndarray(out)
+            x_usm = dpt.round(x_usm * 10**decimals, out=out_usm)
+            res_usm = dpt.divide(x_usm, 10**decimals, out=out_usm)
+
             if dtype is not None:
-                res = res.astype(dtype)
-            return res
+                res_usm = dpt.astype(res_usm, dtype, copy=False)
+
+            dpnp.synchronize_array_data(res_usm)
+            if out is not None and isinstance(out, dpnp_array):
+                return out
+            return dpnp_array._create_from_usm_ndarray(res_usm)
         else:
             return super().__call__(x, out=out, dtype=dtype)
 
diff --git a/dpnp/dpnp_iface.py b/dpnp/dpnp_iface.py
index 8e5304cd9eb0..b3103869e8d3 100644
--- a/dpnp/dpnp_iface.py
+++ b/dpnp/dpnp_iface.py
@@ -240,10 +240,10 @@ def astype(x1, dtype, order="K", casting="unsafe", copy=True, device=None):
         x1_obj, dtype, order=order, casting=casting, copy=copy, device=device
     )
 
-    # return x1 if dpctl returns a zero copy of x1_obj
+    dpnp.synchronize_array_data(x1)
     if array_obj is x1_obj and isinstance(x1, dpnp_array):
+        # return x1 if dpctl returns a zero copy of x1_obj
         return x1
-
     return dpnp_array._create_from_usm_ndarray(array_obj)
 
 

From 2ef8f667a7383a2c79da2dfb9f7e654efaa7a829 Mon Sep 17 00:00:00 2001
From: Anton Volkov <antonwolfy@gmail.com>
Date: Mon, 1 Jul 2024 17:08:11 +0200
Subject: [PATCH 08/11] Updated counting and histogram functions

---
 dpnp/dpnp_iface_counting.py   |  7 ++--
 dpnp/dpnp_iface_histograms.py | 66 +++++++++++++++++++++++++----------
 2 files changed, 52 insertions(+), 21 deletions(-)

diff --git a/dpnp/dpnp_iface_counting.py b/dpnp/dpnp_iface_counting.py
index 8a90601ce8fe..515cad08a06b 100644
--- a/dpnp/dpnp_iface_counting.py
+++ b/dpnp/dpnp_iface_counting.py
@@ -37,6 +37,8 @@
 
 """
 
+import dpctl.tensor as dpt
+
 import dpnp
 
 __all__ = ["count_nonzero"]
@@ -87,5 +89,6 @@ def count_nonzero(a, axis=None, *, keepdims=False):
 
     # TODO: might be improved by implementing an extension
     # with `count_nonzero` kernel
-    a = dpnp.astype(a, dpnp.bool, copy=False)
-    return a.sum(axis=axis, dtype=dpnp.intp, keepdims=keepdims)
+    usm_a = dpnp.get_usm_ndarray(a)
+    usm_a = dpt.astype(usm_a, dpnp.bool, copy=False)
+    return dpnp.sum(usm_a, axis=axis, dtype=dpnp.intp, keepdims=keepdims)
diff --git a/dpnp/dpnp_iface_histograms.py b/dpnp/dpnp_iface_histograms.py
index 1a1b4daf740d..bf3164ab9b6c 100644
--- a/dpnp/dpnp_iface_histograms.py
+++ b/dpnp/dpnp_iface_histograms.py
@@ -40,11 +40,17 @@
 import operator
 import warnings
 
+import dpctl.tensor as dpt
 import dpctl.utils as dpu
 import numpy
 
 import dpnp
 
+from .dpnp_algo.dpnp_arraycreation import (
+    dpnp_linspace,
+)
+from .dpnp_array import dpnp_array
+
 __all__ = [
     "digitize",
     "histogram",
@@ -60,7 +66,7 @@ def _ravel_check_a_and_weights(a, weights):
     """Check input `a` and `weights` arrays, and ravel both."""
 
     # ensure that `a` array has supported type
-    dpnp.check_supported_arrays_type(a)
+    a = dpnp.get_usm_ndarray(a)
     usm_type = a.usm_type
 
     # ensure that the array is a "subtractable" dtype
@@ -71,11 +77,11 @@ def _ravel_check_a_and_weights(a, weights):
             RuntimeWarning,
             stacklevel=3,
         )
-        a = a.astype(numpy.uint8)
+        a = dpt.astype(a, numpy.uint8)
 
     if weights is not None:
         # check that `weights` array has supported type
-        dpnp.check_supported_arrays_type(weights)
+        weights = dpnp.get_usm_ndarray(weights)
         usm_type = dpu.get_coerced_usm_type([usm_type, weights.usm_type])
 
         # check that arrays have the same allocation queue
@@ -86,8 +92,9 @@ def _ravel_check_a_and_weights(a, weights):
 
         if weights.shape != a.shape:
             raise ValueError("weights should have the same shape as a.")
-        weights = weights.ravel()
-    a = a.ravel()
+        weights = dpt.reshape(weights, -1)
+
+    a = dpt.reshape(a, -1)
     return a, weights, usm_type
 
 
@@ -113,7 +120,7 @@ def _get_outer_edges(a, range):
         first_edge, last_edge = 0, 1
 
     else:
-        first_edge, last_edge = a.min(), a.max()
+        first_edge, last_edge = dpt.min(a), dpt.max(a)
         if not (dpnp.isfinite(first_edge) and dpnp.isfinite(last_edge)):
             raise ValueError(
                 f"autodetected range of [{first_edge}, {last_edge}] "
@@ -157,9 +164,9 @@ def _get_bin_edges(a, bins, range, usm_type):
                     "a and bins must be allocated on the same SYCL queue"
                 )
 
-            bin_edges = bins
+            bin_edges = dpnp.get_usm_ndarray(bins)
         else:
-            bin_edges = dpnp.asarray(
+            bin_edges = dpt.asarray(
                 bins, sycl_queue=sycl_queue, usm_type=usm_type
             )
 
@@ -183,7 +190,7 @@ def _get_bin_edges(a, bins, range, usm_type):
             )
 
         # bin edges must be computed
-        bin_edges = dpnp.linspace(
+        bin_edges = dpnp_linspace(
             first_edge,
             last_edge,
             n_equal_bins + 1,
@@ -204,8 +211,11 @@ def _search_sorted_inclusive(a, v):
 
     """
 
-    return dpnp.concatenate(
-        (a.searchsorted(v[:-1], "left"), a.searchsorted(v[-1:], "right"))
+    return dpt.concat(
+        (
+            dpt.searchsorted(a, v[:-1], side="left"),
+            dpt.searchsorted(a, v[-1:], side="right"),
+        )
     )
 
 
@@ -297,8 +307,14 @@ def digitize(x, bins, right=False):
         # Use dpnp.searchsorted directly if bins are increasing
         return dpnp.searchsorted(bins, x, side=side)
 
+    usm_x = dpnp.get_usm_ndarray(x)
+    usm_bins = dpnp.get_usm_ndarray(bins)
+
     # Reverse bins and adjust indices if bins are decreasing
-    return bins.size - dpnp.searchsorted(bins[::-1], x, side=side)
+    usm_res = usm_bins.size - dpt.searchsorted(usm_bins[::-1], usm_x, side=side)
+
+    dpnp.synchronize_array_data(usm_res)
+    return dpnp_array._create_from_usm_ndarray(usm_res)
 
 
 def histogram(a, bins=10, range=None, density=None, weights=None):
@@ -412,26 +428,36 @@ def histogram(a, bins=10, range=None, density=None, weights=None):
     else:
         # Compute via cumulative histogram
         if weights is None:
-            sa = dpnp.sort(a)
+            sa = dpt.sort(a)
             cum_n = _search_sorted_inclusive(sa, bin_edges)
         else:
-            zero = dpnp.zeros(
+            zero = dpt.zeros(
                 1, dtype=ntype, sycl_queue=a.sycl_queue, usm_type=usm_type
             )
-            sorting_index = dpnp.argsort(a)
+            sorting_index = dpt.argsort(a)
             sa = a[sorting_index]
             sw = weights[sorting_index]
-            cw = dpnp.concatenate((zero, sw.cumsum(dtype=ntype)))
+            cw = dpt.concat((zero, dpt.cumulative_sum(sw, dtype=ntype)))
             bin_index = _search_sorted_inclusive(sa, bin_edges)
             cum_n = cw[bin_index]
 
         n = dpnp.diff(cum_n)
 
+    # convert bin_edges to dpnp.ndarray
+    bin_edges = dpnp_array._create_from_usm_ndarray(bin_edges)
+
     if density:
         # pylint: disable=possibly-used-before-assignment
-        db = dpnp.diff(bin_edges).astype(dpnp.default_float_type())
-        return n / db / n.sum(), bin_edges
+        db = dpnp.diff(bin_edges)
+        db = dpt.astype(db.get_array(), dpnp.default_float_type())
+
+        usm_n = n.get_array()
+        hist = usm_n / db / dpt.sum(usm_n)
 
+        dpnp.synchronize_array_data(hist)
+        return dpnp_array._create_from_usm_ndarray(hist), bin_edges
+
+    dpnp.synchronize_array_data(n)
     return n, bin_edges
 
 
@@ -517,4 +543,6 @@ def histogram_bin_edges(a, bins=10, range=None, weights=None):
 
     a, weights, usm_type = _ravel_check_a_and_weights(a, weights)
     bin_edges, _ = _get_bin_edges(a, bins, range, usm_type)
-    return bin_edges
+
+    dpnp.synchronize_array_data(bin_edges)
+    return dpnp_array._create_from_usm_ndarray(bin_edges)

From 12c7a399735e31d340880cc050d0ca4b2a086043 Mon Sep 17 00:00:00 2001
From: Anton Volkov <antonwolfy@gmail.com>
Date: Mon, 1 Jul 2024 17:27:44 +0200
Subject: [PATCH 09/11] Switched back to use dppy/label/dev for coverage GH
 action

---
 .github/workflows/generate_coverage.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/generate_coverage.yaml b/.github/workflows/generate_coverage.yaml
index 1fa71fb479dc..5a0480235a7b 100644
--- a/.github/workflows/generate_coverage.yaml
+++ b/.github/workflows/generate_coverage.yaml
@@ -21,7 +21,7 @@ jobs:
 
     env:
       python-ver: '3.10'
-      CHANNELS: '-c dppy/label/coverage -c intel -c conda-forge --override-channels'
+      CHANNELS: '-c dppy/label/dev -c intel -c conda-forge --override-channels'
       # Install the latest oneAPI compiler to work around an issue
       INSTALL_ONE_API: 'yes'
 

From fcc66b09c0135d268a16e49be2b2fb221708d83e Mon Sep 17 00:00:00 2001
From: Anton Volkov <antonwolfy@gmail.com>
Date: Mon, 1 Jul 2024 17:28:37 +0200
Subject: [PATCH 10/11] Removed dpnp_container.linspace since unused

---
 dpnp/dpnp_container.py | 33 ---------------------------------
 1 file changed, 33 deletions(-)

diff --git a/dpnp/dpnp_container.py b/dpnp/dpnp_container.py
index 5b17f9e87151..8f70e015393c 100644
--- a/dpnp/dpnp_container.py
+++ b/dpnp/dpnp_container.py
@@ -47,7 +47,6 @@
     "empty",
     "eye",
     "full",
-    "linspace",
     "ones",
     "tril",
     "triu",
@@ -245,38 +244,6 @@ def full(
     return dpnp_array(array_obj.shape, buffer=array_obj, order=order)
 
 
-def linspace(
-    start,
-    stop,
-    /,
-    num,
-    *,
-    dtype=None,
-    device=None,
-    usm_type="device",
-    sycl_queue=None,
-    endpoint=True,
-):
-    """Validate input parameters before passing them into `dpctl.tensor` module"""
-    dpu.validate_usm_type(usm_type, allow_none=False)
-    sycl_queue_normalized = dpnp.get_normalized_queue_device(
-        sycl_queue=sycl_queue, device=device
-    )
-
-    """Creates `dpnp_array` with evenly spaced numbers of specified interval."""
-    array_obj = dpt.linspace(
-        start,
-        stop,
-        num,
-        dtype=dtype,
-        usm_type=usm_type,
-        sycl_queue=sycl_queue_normalized,
-        endpoint=endpoint,
-    )
-    dpnp.synchronize_array_data(array_obj)
-    return dpnp_array(array_obj.shape, buffer=array_obj)
-
-
 def ones(
     shape,
     *,

From 59ac91b04fe63c50d31b1978568d2ee823f08bd0 Mon Sep 17 00:00:00 2001
From: Anton Volkov <antonwolfy@gmail.com>
Date: Mon, 1 Jul 2024 20:18:03 +0200
Subject: [PATCH 11/11] Return dpnp ndarray for linspace, logspace and
 geomspace internal functions

---
 dpnp/dpnp_algo/dpnp_arraycreation.py | 13 ++++++++-----
 dpnp/dpnp_iface_arraycreation.py     |  4 ----
 dpnp/dpnp_iface_histograms.py        |  2 +-
 3 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/dpnp/dpnp_algo/dpnp_arraycreation.py b/dpnp/dpnp_algo/dpnp_arraycreation.py
index aeb77ae34c7e..b493efac9931 100644
--- a/dpnp/dpnp_algo/dpnp_arraycreation.py
+++ b/dpnp/dpnp_algo/dpnp_arraycreation.py
@@ -90,7 +90,7 @@ def dpnp_geomspace(
         dtype=dtype,
         usm_type=_usm_type,
         sycl_queue=sycl_queue_normalized,
-    )
+    ).get_array()
 
     if num > 0:
         res[0] = start
@@ -102,7 +102,8 @@ def dpnp_geomspace(
     if axis != 0:
         res = dpt.moveaxis(res, 0, axis)
 
-    return dpt.astype(res, dtype, copy=False)
+    res = dpt.astype(res, dtype, copy=False)
+    return dpnp_array._create_from_usm_ndarray(res)
 
 
 def dpnp_linspace(
@@ -200,12 +201,14 @@ def dpnp_linspace(
         dpt.floor(usm_res, out=usm_res)
 
     res = dpt.astype(usm_res, dtype, copy=False)
+    res = dpnp_array._create_from_usm_ndarray(res)
+
     if retstep is True:
         if dpnp.isscalar(step):
             step = dpt.asarray(
                 step, usm_type=res.usm_type, sycl_queue=res.sycl_queue
             )
-        return (res, step)
+        return res, dpnp_array._create_from_usm_ndarray(step)
     return res
 
 
@@ -254,12 +257,12 @@ def dpnp_logspace(
         sycl_queue=sycl_queue,
         endpoint=endpoint,
         axis=axis,
-    )
+    ).get_array()
 
     dpt.pow(base, res, out=res)
     if dtype is not None:
         res = dpt.astype(res, dtype, copy=False)
-    return res
+    return dpnp_array._create_from_usm_ndarray(res)
 
 
 class dpnp_nd_grid:
diff --git a/dpnp/dpnp_iface_arraycreation.py b/dpnp/dpnp_iface_arraycreation.py
index 0f74906d6b80..6698f3f782e8 100644
--- a/dpnp/dpnp_iface_arraycreation.py
+++ b/dpnp/dpnp_iface_arraycreation.py
@@ -2201,7 +2201,6 @@ def geomspace(
     )
 
     dpnp.synchronize_array_data(res)
-    res = dpnp_array._create_from_usm_ndarray(res)
     return res
 
 
@@ -2426,10 +2425,8 @@ def linspace(
 
     if isinstance(res, tuple):  # (result, step) is returning
         dpnp.synchronize_array_data(res[0])
-        res = tuple(dpnp_array._create_from_usm_ndarray(x) for x in res)
     else:
         dpnp.synchronize_array_data(res)
-        res = dpnp_array._create_from_usm_ndarray(res)
     return res
 
 
@@ -2660,7 +2657,6 @@ def logspace(
     )
 
     dpnp.synchronize_array_data(res)
-    res = dpnp_array._create_from_usm_ndarray(res)
     return res
 
 
diff --git a/dpnp/dpnp_iface_histograms.py b/dpnp/dpnp_iface_histograms.py
index bf3164ab9b6c..24c8b6aaf78d 100644
--- a/dpnp/dpnp_iface_histograms.py
+++ b/dpnp/dpnp_iface_histograms.py
@@ -198,7 +198,7 @@ def _get_bin_edges(a, bins, range, usm_type):
             dtype=bin_type,
             sycl_queue=sycl_queue,
             usm_type=usm_type,
-        )
+        ).get_array()
         return bin_edges, (first_edge, last_edge, n_equal_bins)
     return bin_edges, None