diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml
index df81315ba486..91351d2fd13a 100644
--- a/.github/workflows/conda-package.yml
+++ b/.github/workflows/conda-package.yml
@@ -40,6 +40,7 @@ env:
       third_party/cupy/linalg_tests/test_norms.py
       third_party/cupy/linalg_tests/test_product.py
       third_party/cupy/linalg_tests/test_solve.py
+      third_party/cupy/linalg_tests/test_einsum.py
       third_party/cupy/logic_tests/test_comparison.py
       third_party/cupy/logic_tests/test_truth.py
       third_party/cupy/manipulation_tests/test_basic.py
diff --git a/doc/known_words.txt b/doc/known_words.txt
index 34e9c2aeb631..758594b6fa3e 100644
--- a/doc/known_words.txt
+++ b/doc/known_words.txt
@@ -5,7 +5,9 @@ broadcastable
 broadcasted
 byteorder
 Cholesky
+combinatorially
 conda
+cubically
 Decompositions
 dimensionality
 docstring
@@ -22,6 +24,7 @@ finfo
 finiteness
 Fortran
 Frobenius
+Hadamard
 Hypergeometric
 iinfo
 Infs
diff --git a/dpnp/dpnp_array.py b/dpnp/dpnp_array.py
index ad579af58067..b65efa9b9938 100644
--- a/dpnp/dpnp_array.py
+++ b/dpnp/dpnp_array.py
@@ -580,14 +580,14 @@ def astype(self, dtype, order="K", casting="unsafe", subok=True, copy=True):
             If it is False and no cast happens, then this method returns the array itself.
             Otherwise, a copy is returned.
         casting : {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}, optional
-            Controls what kind of data casting may occur. Defaults to 'unsafe' for backwards compatibility.
-
+            Controls what kind of data casting may occur.
+            Defaults to ``'unsafe'`` for backwards compatibility.
             - 'no' means the data types should not be cast at all.
             - 'equiv' means only byte-order changes are allowed.
             - 'safe' means only casts which can preserve values are allowed.
-            - 'same_kind' means only safe casts or casts within a kind, like float64 to float32, are allowed.
+            - 'same_kind' means only safe casts or casts within a kind, like
+              float64 to float32, are allowed.
             - 'unsafe' means any data conversions may be done.
-
         copy : bool, optional
             By default, ``astype`` always returns a newly allocated array. If
             this is set to ``False``, and the `dtype`, `order`, and `subok`
diff --git a/dpnp/dpnp_iface.py b/dpnp/dpnp_iface.py
index 19470a8a14bf..9111fd0b50bb 100644
--- a/dpnp/dpnp_iface.py
+++ b/dpnp/dpnp_iface.py
@@ -331,7 +331,7 @@ def check_supported_arrays_type(*arrays, scalar_type=False, all_scalars=False):
             f"An array must be any of supported type, but got {type(a)}"
         )
 
-    if len(arrays) > 1 and not (all_scalars or any_is_array):
+    if len(arrays) > 0 and not (all_scalars or any_is_array):
         raise TypeError(
             "At least one input must be of supported array type, "
             "but got all scalars."
diff --git a/dpnp/dpnp_iface_linearalgebra.py b/dpnp/dpnp_iface_linearalgebra.py
index 32741ff0edec..57dbf0283c57 100644
--- a/dpnp/dpnp_iface_linearalgebra.py
+++ b/dpnp/dpnp_iface_linearalgebra.py
@@ -49,6 +49,7 @@
 )
 from .dpnp_utils.dpnp_utils_linearalgebra import (
     dpnp_dot,
+    dpnp_einsum,
     dpnp_kron,
     dpnp_matmul,
 )
@@ -122,11 +123,11 @@ def dot(a, b, out=None):
     array([[4, 1],
            [2, 2]])
 
-    >>> a = np.arange(3*4*5*6).reshape((3,4,5,6))
-    >>> b = np.arange(3*4*5*6)[::-1].reshape((5,4,6,3))
-    >>> np.dot(a, b)[2,3,2,1,2,2]
+    >>> a = np.arange(3 * 4 * 5 * 6).reshape((3, 4, 5, 6))
+    >>> b = np.arange(3 * 4 * 5 * 6)[::-1].reshape((5, 4, 6, 3))
+    >>> np.dot(a, b)[2, 3, 2, 1, 2, 2]
     array(499128)
-    >>> sum(a[2,3,2,:] * b[1,2,:,2])
+    >>> sum(a[2, 3, 2, :] * b[1, 2, :, 2])
     array(499128)
 
     """
@@ -166,15 +167,61 @@ def dot(a, b, out=None):
     return dpnp.get_result_array(result, out, casting="no")
 
 
-def einsum(*args, **kwargs):
+def einsum(
+    *operands, out=None, dtype=None, order="K", casting="safe", optimize=False
+):
     """
+    einsum(subscripts, *operands, out=None, dtype=None, order="K", \
+        casting="safe", optimize=False)
+
     Evaluates the Einstein summation convention on the operands.
 
     For full documentation refer to :obj:`numpy.einsum`.
 
-    Limitations
-    -----------
-    Function is executed sequentially on CPU.
+    Parameters
+    ----------
+    subscripts : str
+        Specifies the subscripts for summation as comma separated list of
+        subscript labels. An implicit (classical Einstein summation)
+        calculation is performed unless the explicit indicator '->' is
+        included as well as subscript labels of the precise output form.
+    *operands : sequence of {dpnp.ndarrays, usm_ndarray}
+        These are the arrays for the operation.
+    out : {dpnp.ndarrays, usm_ndarray, None}, optional
+        If provided, the calculation is done into this array.
+    dtype : {dtype, None}, optional
+        If provided, forces the calculation to use the data type specified.
+        Default is ``None``.
+    order : {"C", "F", "A", "K"}, optional
+        Controls the memory layout of the output. ``"C"`` means it should be
+        C-contiguous. ``"F"`` means it should be F-contiguous, ``"A"`` means
+        it should be ``"F"`` if the inputs are all ``"F"``, ``"C"`` otherwise.
+        ``"K"`` means it should be as close to the layout as the inputs as
+        is possible, including arbitrarily permuted axes.
+        Default is ``"K"``.
+    casting : {"no", "equiv", "safe", "same_kind", "unsafe"}, optional
+        Controls what kind of data casting may occur. Setting this to
+        ``"unsafe"`` is not recommended, as it can adversely affect
+        accumulations.
+
+          * ``"no"`` means the data types should not be cast at all.
+          * ``"equiv"`` means only byte-order changes are allowed.
+          * ``"safe"`` means only casts which can preserve values are allowed.
+          * ``"same_kind"`` means only safe casts or casts within a kind,
+            like float64 to float32, are allowed.
+          * ``"unsafe"`` means any data conversions may be done.
+
+        Default is ``"safe"``.
+    optimize : {False, True, "greedy", "optimal"}, optional
+        Controls if intermediate optimization should occur. No optimization
+        will occur if ``False`` and ``True`` will default to the ``"greedy"``
+        algorithm. Also accepts an explicit contraction list from the
+        :obj:`dpnp.einsum_path` function. Default is ``False``.
+
+    Returns
+    -------
+    out : dpnp.ndarray
+        The calculation based on the Einstein summation convention.
 
     See Also
     -------
@@ -183,36 +230,323 @@ def einsum(*args, **kwargs):
     :obj:`dpnp.dot` : Returns the dot product of two arrays.
     :obj:`dpnp.inner` : Returns the inner product of two arrays.
     :obj:`dpnp.outer` : Returns the outer product of two arrays.
+    :obj:`dpnp.tensordot` :  Sum products over arbitrary axes.
+    :obj:`dpnp.linalg.multi_dot` : Chained dot product.
+
+    Examples
+    --------
+    >>> import dpnp as np
+    >>> a = np.arange(25).reshape(5,5)
+    >>> b = np.arange(5)
+    >>> c = np.arange(6).reshape(2,3)
+
+    Trace of a matrix:
+
+    >>> np.einsum("ii", a)
+    array(60)
+    >>> np.einsum(a, [0,0])
+    array(60)
+    >>> np.trace(a)
+    array(60)
+
+    Extract the diagonal (requires explicit form):
+
+    >>> np.einsum("ii->i", a)
+    array([ 0,  6, 12, 18, 24])
+    >>> np.einsum(a, [0, 0], [0])
+    array([ 0,  6, 12, 18, 24])
+    >>> np.diag(a)
+    array([ 0,  6, 12, 18, 24])
+
+    Sum over an axis (requires explicit form):
+
+    >>> np.einsum("ij->i", a)
+    array([ 10,  35,  60,  85, 110])
+    >>> np.einsum(a, [0, 1], [0])
+    array([ 10,  35,  60,  85, 110])
+    >>> np.sum(a, axis=1)
+    array([ 10,  35,  60,  85, 110])
+
+    For higher dimensional arrays summing a single axis can be done
+    with ellipsis:
+
+    >>> np.einsum("...j->...", a)
+    array([ 10,  35,  60,  85, 110])
+    >>> np.einsum(a, [Ellipsis,1], [Ellipsis])
+    array([ 10,  35,  60,  85, 110])
+
+    Compute a matrix transpose, or reorder any number of axes:
+
+    >>> np.einsum("ji", c)
+    array([[0, 3],
+           [1, 4],
+           [2, 5]])
+    >>> np.einsum("ij->ji", c)
+    array([[0, 3],
+           [1, 4],
+           [2, 5]])
+    >>> np.einsum(c, [1, 0])
+    array([[0, 3],
+           [1, 4],
+           [2, 5]])
+    >>> np.transpose(c)
+    array([[0, 3],
+           [1, 4],
+           [2, 5]])
+
+    Vector inner products:
+
+    >>> np.einsum("i,i", b, b)
+    array(30)
+    >>> np.einsum(b, [0], b, [0])
+    array(30)
+    >>> np.inner(b,b)
+    array(30)
+
+    Matrix vector multiplication:
+
+    >>> np.einsum("ij,j", a, b)
+    array([ 30,  80, 130, 180, 230])
+    >>> np.einsum(a, [0,1], b, [1])
+    array([ 30,  80, 130, 180, 230])
+    >>> np.dot(a, b)
+    array([ 30,  80, 130, 180, 230])
+    >>> np.einsum("...j,j", a, b)
+    array([ 30,  80, 130, 180, 230])
+
+    Broadcasting and scalar multiplication:
+
+    >>> np.einsum("..., ...", 3, c)
+    array([[ 0,  3,  6],
+           [ 9, 12, 15]])
+    >>> np.einsum(",ij", 3, c)
+    array([[ 0,  3,  6],
+           [ 9, 12, 15]])
+    >>> np.einsum(3, [Ellipsis], c, [Ellipsis])
+    array([[ 0,  3,  6],
+           [ 9, 12, 15]])
+    >>> np.multiply(3, c)
+    array([[ 0,  3,  6],
+           [ 9, 12, 15]])
+
+    Vector outer product:
+
+    >>> np.einsum("i,j", np.arange(2)+1, b)
+    array([[0, 1, 2, 3, 4],
+           [0, 2, 4, 6, 8]])
+    >>> np.einsum(np.arange(2)+1, [0], b, [1])
+    array([[0, 1, 2, 3, 4],
+           [0, 2, 4, 6, 8]])
+    >>> np.outer(np.arange(2)+1, b)
+    array([[0, 1, 2, 3, 4],
+           [0, 2, 4, 6, 8]])
+
+    Tensor contraction:
+
+    >>> a = np.arange(60.).reshape(3, 4, 5)
+    >>> b = np.arange(24.).reshape(4, 3, 2)
+    >>> np.einsum("ijk,jil->kl", a, b)
+    array([[4400., 4730.],
+           [4532., 4874.],
+           [4664., 5018.],
+           [4796., 5162.],
+           [4928., 5306.]])
+    >>> np.einsum(a, [0, 1, 2], b, [1, 0, 3], [2, 3])
+    array([[4400., 4730.],
+           [4532., 4874.],
+           [4664., 5018.],
+           [4796., 5162.],
+           [4928., 5306.]])
+    >>> np.tensordot(a, b, axes=([1, 0],[0, 1]))
+    array([[4400., 4730.],
+           [4532., 4874.],
+           [4664., 5018.],
+           [4796., 5162.],
+           [4928., 5306.]])
+
+    Example of ellipsis use:
+
+    >>> a = np.arange(6).reshape((3, 2))
+    >>> b = np.arange(12).reshape((4, 3))
+    >>> np.einsum("ki,jk->ij", a, b)
+    array([[10, 28, 46, 64],
+           [13, 40, 67, 94]])
+    >>> np.einsum("ki,...k->i...", a, b)
+    array([[10, 28, 46, 64],
+           [13, 40, 67, 94]])
+    >>> np.einsum("k...,jk", a, b)
+    array([[10, 28, 46, 64],
+           [13, 40, 67, 94]])
+
+    Chained array operations. For more complicated contractions, speed ups
+    might be achieved by repeatedly computing a "greedy" path or computing
+    the "optimal" path in advance and repeatedly applying it, using an
+    `einsum_path` insertion. Performance improvements can be particularly
+    significant with larger arrays:
+
+    >>> a = np.ones(64000).reshape(20, 40, 80)
+
+    Basic `einsum`: 119 ms ± 26 ms per loop (evaluated on 12th
+    Gen Intel\u00AE Core\u2122 i7 processor)
+
+    >>> %timeit np.einsum("ijk,ilm,njm,nlk,abc->",a,a,a,a,a)
+
+    Sub-optimal `einsum`: 32.9 ms ± 5.1 ms per loop
+
+    >>> %timeit np.einsum("ijk,ilm,njm,nlk,abc->",a,a,a,a,a, optimize="optimal")
+
+    Greedy `einsum`: 28.6 ms ± 4.8 ms per loop
+
+    >>> %timeit np.einsum("ijk,ilm,njm,nlk,abc->",a,a,a,a,a, optimize="greedy")
+
+    Optimal `einsum`: 26.9 ms ± 6.3 ms per loop
+
+    >>> path = np.einsum_path(
+        "ijk,ilm,njm,nlk,abc->",a,a,a,a,a, optimize="optimal"
+    )[0]
+    >>> %timeit np.einsum("ijk,ilm,njm,nlk,abc->",a,a,a,a,a, optimize=path)
 
     """
 
-    return call_origin(numpy.einsum, *args, **kwargs)
+    if optimize is True:
+        optimize = "greedy"
+
+    return dpnp_einsum(
+        *operands,
+        out=out,
+        dtype=dtype,
+        order=order,
+        casting=casting,
+        optimize=optimize,
+    )
 
 
-def einsum_path(*args, **kwargs):
+def einsum_path(*operands, optimize="greedy", einsum_call=False):
     """
-    einsum_path(subscripts, *operands, optimize='greedy')
+    einsum_path(subscripts, *operands, optimize="greedy")
 
     Evaluates the lowest cost contraction order for an einsum expression
     by considering the creation of intermediate arrays.
 
     For full documentation refer to :obj:`numpy.einsum_path`.
 
-    Limitations
-    -----------
-    Function is executed sequentially on CPU.
+    Parameters
+    ----------
+    subscripts : str
+        Specifies the subscripts for summation.
+    *operands : sequence of arrays
+        These are the arrays for the operation in any form that can be
+        converted to an array. This includes scalars, lists, lists of
+        tuples, tuples, tuples of tuples, tuples of lists, and ndarrays.
+    optimize : {bool, list, tuple, None, "greedy", "optimal"}
+        Choose the type of path. If a tuple is provided, the second argument is
+        assumed to be the maximum intermediate size created. If only a single
+        argument is provided the largest input or output array size is used
+        as a maximum intermediate size.
+
+        * if a list is given that starts with ``einsum_path``, uses this as the
+          contraction path
+        * if ``False`` or ``None`` no optimization is taken
+        * if ``True`` defaults to the "greedy" algorithm
+        * ``"optimal"`` is an algorithm that combinatorially explores all
+          possible ways of contracting the listed tensors and chooses the
+          least costly path. Scales exponentially with the number of terms
+          in the contraction.
+        * ``"greedy"`` is an algorithm that chooses the best pair contraction
+          at each step. Effectively, this algorithm searches the largest inner,
+          Hadamard, and then outer products at each step. Scales cubically with
+          the number of terms in the contraction. Equivalent to the
+          ``"optimal"`` path for most contractions.
+
+        Default is ``"greedy"``.
+
+    Returns
+    -------
+    path : list of tuples
+        A list representation of the einsum path.
+    string_repr : str
+        A printable representation of the einsum path.
+
+    Notes
+    -----
+    The resulting path indicates which terms of the input contraction should be
+    contracted first, the result of this contraction is then appended to the
+    end of the contraction list. This list can then be iterated over until all
+    intermediate contractions are complete.
 
     See Also
     --------
     :obj:`dpnp.einsum` : Evaluates the Einstein summation convention
                          on the operands.
+    :obj:`dpnp.linalg.multi_dot` : Chained dot product.
     :obj:`dpnp.dot` : Returns the dot product of two arrays.
     :obj:`dpnp.inner` : Returns the inner product of two arrays.
     :obj:`dpnp.outer` : Returns the outer product of two arrays.
 
+    Examples
+    --------
+    We can begin with a chain dot example. In this case, it is optimal to
+    contract the ``b`` and ``c`` tensors first as represented by the first
+    element of the path ``(1, 2)``. The resulting tensor is added to the end
+    of the contraction and the remaining contraction ``(0, 1)`` is then
+    completed.
+
+    >>> import dpnp as np
+    >>> np.random.seed(123)
+    >>> a = np.random.rand(2, 2)
+    >>> b = np.random.rand(2, 5)
+    >>> c = np.random.rand(5, 2)
+    >>> path_info = np.einsum_path("ij,jk,kl->il", a, b, c, optimize="greedy")
+
+    >>> print(path_info[0])
+    ['einsum_path', (1, 2), (0, 1)]
+
+    >>> print(path_info[1])
+      Complete contraction:  ij,jk,kl->il # may vary
+             Naive scaling:  4
+         Optimized scaling:  3
+          Naive FLOP count:  1.200e+02
+      Optimized FLOP count:  5.700e+01
+       Theoretical speedup:  2.105
+      Largest intermediate:  4.000e+00 elements
+    -------------------------------------------------------------------------
+    scaling                  current                                remaining
+    -------------------------------------------------------------------------
+       3                   kl,jk->jl                                ij,jl->il
+       3                   jl,ij->il                                   il->il
+
+    A more complex index transformation example.
+
+    >>> I = np.random.rand(10, 10, 10, 10)
+    >>> C = np.random.rand(10, 10)
+    >>> path_info = np.einsum_path(
+            "ea,fb,abcd,gc,hd->efgh", C, C, I, C, C, optimize="greedy"
+        )
+    >>> print(path_info[0])
+    ['einsum_path', (0, 2), (0, 3), (0, 2), (0, 1)]
+    >>> print(path_info[1])
+      Complete contraction:  ea,fb,abcd,gc,hd->efgh # may vary
+             Naive scaling:  8
+         Optimized scaling:  5
+          Naive FLOP count:  5.000e+08
+      Optimized FLOP count:  8.000e+05
+       Theoretical speedup:  624.999
+      Largest intermediate:  1.000e+04 elements
+    --------------------------------------------------------------------------
+    scaling                  current                                remaining
+    --------------------------------------------------------------------------
+       5               abcd,ea->bcde                      fb,gc,hd,bcde->efgh
+       5               bcde,fb->cdef                         gc,hd,cdef->efgh
+       5               cdef,gc->defg                            hd,defg->efgh
+       5               defg,hd->efgh                               efgh->efgh
+
     """
 
-    return call_origin(numpy.einsum_path, *args, **kwargs)
+    return numpy.einsum_path(
+        *operands,
+        optimize=optimize,
+        einsum_call=einsum_call,
+    )
 
 
 def inner(a, b):
@@ -408,7 +742,7 @@ def matmul(
         Type to use in computing the matrix product. By default, the returned
         array will have data type that is determined by considering
         Promotion Type Rule and device capabilities.
-    casting : {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}, optional
+    casting : {"no", "equiv", "safe", "same_kind", "unsafe"}, optional
         Controls what kind of data casting may occur. Default: ``"same_kind"``.
     order : {"C", "F", "A", "K", None}, optional
         Memory layout of the newly output array, if parameter `out` is ``None``.
diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py
index 3bab6b5363aa..68f5b5d34ae4 100644
--- a/dpnp/dpnp_iface_mathematical.py
+++ b/dpnp/dpnp_iface_mathematical.py
@@ -145,7 +145,7 @@ def _append_to_diff_array(a, axis, combined, values):
 
     """
 
-    dpnp.check_supported_arrays_type(values, scalar_type=True)
+    dpnp.check_supported_arrays_type(values, scalar_type=True, all_scalars=True)
     if dpnp.isscalar(values):
         values = dpnp.asarray(
             values, sycl_queue=a.sycl_queue, usm_type=a.usm_type
diff --git a/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py b/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py
index 080c3c52b3c8..92ea44e6c925 100644
--- a/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py
+++ b/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py
@@ -23,6 +23,11 @@
 # THE POSSIBILITY OF SUCH DAMAGE.
 # *****************************************************************************
 
+import copy
+import itertools
+import operator
+import warnings
+
 import dpctl
 import dpctl.tensor as dpt
 import dpctl.tensor._tensor_elementwise_impl as tei
@@ -35,7 +40,72 @@
 from dpnp.dpnp_array import dpnp_array
 from dpnp.dpnp_utils import get_usm_allocations
 
-__all__ = ["dpnp_cross", "dpnp_dot", "dpnp_kron", "dpnp_matmul"]
+_einsum_symbols = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
+
+
+__all__ = ["dpnp_cross", "dpnp_dot", "dpnp_einsum", "dpnp_kron", "dpnp_matmul"]
+
+
+def _calc_offset(shape, linear_id, strides):
+    """
+    Calculate the offset in a multi-dimensional array given the shape, linear_id, and strides.
+
+    Parameters
+    ----------
+    shape : tuple
+        The shape of the multi-dimensional array.
+    linear_id : int
+        The linear index in the multi-dimensional array.
+    strides : tuple
+        The strides of the multi-dimensional array.
+
+    Returns
+    -------
+    out : int
+        The offset in the multi-dimensional array.
+
+    """
+
+    offset = 0
+    indices = _index_linear_to_tuple(shape, linear_id)
+    for i in range(len(indices)):
+        offset += indices[i] * strides[i]
+    return offset
+
+
+def _chr(label):
+    """
+    Copied from _chr in cupy/core/_einsum.py
+
+    Converts an integer label to a character representation.
+
+    Parameters
+    ----------
+    label : int
+        The integer label to be converted.
+
+    Returns
+    -------
+    out : str
+        A string representation of the label. If the label is negative,
+        it returns a string in the format '...[label]', where label is the
+        negative integer. Otherwise, it returns a single character string
+        representing the label.
+
+    Examples
+    --------
+    >>> import dpnp.dpnp_utils.dpnp_utils_linearalgebra as np_util
+    >>> np_util._chr(97)
+    'a'
+    >>> np_util._chr(-1)
+    '...[-1]'
+
+    """
+
+    if label < 0:
+        return f"...[{label}]"
+    else:
+        return chr(label)
 
 
 def _create_result_array(x1, x2, out, shape, dtype, usm_type, sycl_queue):
@@ -73,6 +143,61 @@ def _create_result_array(x1, x2, out, shape, dtype, usm_type, sycl_queue):
     )
 
 
+def _compute_size_by_dict(indices, idx_dict):
+    """
+    Copied from _compute_size_by_dict in numpy/core/einsumfunc.py
+
+    Computes the product of the elements in `indices` based on the dictionary
+    `idx_dict`.
+
+    Parameters
+    ----------
+    indices : iterable
+        Indices to base the product on.
+    idx_dict : dictionary
+        Dictionary of index sizes
+
+    Returns
+    -------
+    ret : int
+        The resulting product.
+
+    Examples
+    --------
+    >>> import dpnp.dpnp_utils.dpnp_utils_linearalgebra as np_util
+    >>> np_util._compute_size_by_dict("abbc", {"a": 2, "b":3, "c":5})
+    90
+
+    """
+    ret = 1
+    for i in indices:
+        ret *= idx_dict[i]
+    return ret
+
+
+def _compute_size(start, shape):
+    """
+    Compute the total size of a multi-dimensional array starting from a given index.
+
+    Parameters
+    ----------
+    start : int
+        The starting index from which to compute the size.
+    shape : tuple
+        The shape of the multi-dimensional array.
+
+    Returns
+    -------
+    out : int
+        The total size of the array.
+
+    """
+    ret = 1
+    for i in range(start, len(shape)):
+        ret *= shape[i]
+    return ret
+
+
 def _copy_array(x, dep_events, host_events, contig_copy=False, dtype=None):
     """
     Creating a copy of input array if needed.
@@ -103,6 +228,267 @@ def _copy_array(x, dep_events, host_events, contig_copy=False, dtype=None):
     return x
 
 
+def _einsum_diagonals(input_subscripts, operands):
+    """
+    Adopted from _einsum_diagonals in cupy/core/_einsum.py
+
+    Compute the diagonal for each operand.
+
+    Parameters
+    ----------
+    input_subscripts : tuple or list of str
+        Strings representing the Einstein summation notation for each operand.
+    operands : tuple or list of dpnp.ndarray or usm_ndarray
+        Input arrays.
+
+    Raises
+    ------
+    ValueError
+        If dimensions in the operands for collapsing indices don't match.
+
+    Notes
+    -----
+    This function mutates `input_subscripts` and `operands`.
+
+    Examples
+    --------
+    >>> import dpnp as np
+    >>> import dpnp.dpnp_utils.dpnp_utils_linearalgebra as np_util
+    >>> a = np.arange(9).reshape(3, 3)
+    >>> input_subscripts = ["ii"]
+    >>> operands = [a]
+    >>> np_util._einsum_diagonals(input_subscripts, operands)
+    >>> input_subscripts
+    [['i']]
+    >>> operands
+    [array([0, 4, 8])]
+
+    """
+
+    for idx in range(len(input_subscripts)):
+        sub = input_subscripts[idx]
+        arr = operands[idx]
+
+        # repetitive index in the input_subscripts
+        if len(set(sub)) < len(sub):
+            axeses = {}
+            for axis, label in enumerate(sub):
+                axeses.setdefault(label, []).append(axis)
+
+            axeses = list(axeses.items())
+            for label, axes in axeses:
+                dims = {arr.shape[axis] for axis in axes}
+                if len(dims) >= 2:
+                    dim1 = dims.pop()
+                    dim0 = dims.pop()
+                    raise ValueError(
+                        f"dimensions in operand {idx} "
+                        f"for collapsing index '{label}' don't match ({dim0} != {dim1})"
+                    )
+
+            sub, axeses = zip(*axeses)  # axeses is not empty
+            input_subscripts[idx] = list(sub)
+            operands[idx] = _transpose_ex(arr, axeses)
+
+
+def _expand_dims_transpose(arr, mode, mode_out):
+    """
+    Copied from _expand_dims_transpose in cupy/core/_einsum.py
+
+    Return a reshaped and transposed array.
+
+    The input array `arr` having `mode` as its modes is reshaped and
+    transposed so that modes of the output becomes `mode_out`.
+
+    Parameters
+    ----------
+    arr : {dpnp.ndarray, usm_ndarray}
+        Input array.
+    mode : tuple or list
+        The modes of input array.
+    mode_out : tuple or list
+        The modes of output array.
+
+    Returns
+    -------
+    out : dpnp.ndarray
+        The reshaped and transposed array.
+
+    Example
+    -------
+    >>> import dpnp
+    >>> import dpnp.dpnp_utils.dpnp_utils_linearalgebra as np_util
+    >>> a = dpnp.zeros((10, 20))
+    >>> mode_a = ("A", "B")
+    >>> mode_out = ("B", "C", "A")
+    >>> out = np_util._expand_dims_transpose(a, mode_a, mode_out)
+    >>> out.shape
+    (20, 1, 10)
+
+    """
+    mode = list(mode)
+    shape = list(arr.shape)
+    axes = []
+    for i in mode_out:
+        if i not in mode:
+            mode.append(i)
+            shape.append(1)
+        axes.append(mode.index(i))
+    return dpnp.transpose(arr.reshape(shape), axes)
+
+
+def _find_contraction(positions, input_sets, output_set):
+    """
+    Copied from _find_contraction in numpy/core/einsumfunc.py
+
+    Finds the contraction for a given set of input and output sets.
+
+    Parameters
+    ----------
+    positions : iterable
+        Integer positions of terms used in the contraction.
+    input_sets : list of sets
+        List of sets that represent the lhs side of the einsum subscript
+    output_set : set
+        Set that represents the rhs side of the overall einsum subscript
+
+    Returns
+    -------
+    new_result : set
+        The indices of the resulting contraction
+    remaining : list of sets
+        List of sets that have not been contracted, the new set is appended to
+        the end of this list
+    idx_removed : set
+        Indices removed from the entire contraction
+    idx_contraction : set
+        The indices used in the current contraction
+
+    Examples
+    --------
+    # A simple dot product test case
+    >>> import dpnp.dpnp_utils.dpnp_utils_linearalgebra as np_util
+    >>> pos = (0, 1)
+    >>> isets = [set("ab"), set("bc")]
+    >>> oset = set("ac")
+    >>> np_util._find_contraction(pos, isets, oset)
+    ({'a', 'c'}, [{'a', 'c'}], {'b'}, {'a', 'b', 'c'})
+
+    # A more complex case with additional terms in the contraction
+    >>> pos = (0, 2)
+    >>> isets = [set("abd"), set("ac"), set("bdc")]
+    >>> oset = set("ac")
+    >>> np_util._find_contraction(pos, isets, oset)
+     ({'a', 'c'}, [{'a', 'c'}, {'a', 'c'}], {'b', 'd'}, {'a', 'b', 'c', 'd'})
+
+    """
+
+    idx_contract = set()
+    idx_remain = output_set.copy()
+    remaining = []
+    for ind, value in enumerate(input_sets):
+        if ind in positions:
+            idx_contract |= value
+        else:
+            remaining.append(value)
+            idx_remain |= value
+
+    new_result = idx_remain & idx_contract
+    idx_removed = idx_contract - new_result
+    remaining.append(new_result)
+
+    return (new_result, remaining, idx_removed, idx_contract)
+
+
+def _flatten_transpose(a, axeses):
+    """
+    Copied from _flatten_transpose in cupy/core/_einsum.py
+
+    Transpose and flatten each
+
+    Parameters
+    ----------
+    a : {dpnp.ndarray, usm_ndarray}
+        Input array.
+    axeses : sequence of sequences of ints
+        Axeses
+
+    Returns
+    -------
+    out : dpnp.ndarray
+        `a` with its axes permutated and flatten.
+    shapes : tuple
+        flattened shapes.
+
+    Examples
+    --------
+    >>> import dpnp as np
+    >>> import dpnp.dpnp_utils.dpnp_utils_linearalgebra as np_util
+    >>> a = np.arange(24).reshape(2, 3, 4)
+    >>> axeses = [(0, 2), (1,)]
+    >>> out, shapes = np_util._flatten_transpose(a, axeses)
+    >>> out.shape
+    (8, 3)
+    >>> shapes
+    [(2, 4), (3,)]
+
+    """
+
+    transpose_axes = []
+    shapes = []
+    for axes in axeses:
+        transpose_axes.extend(axes)
+        shapes.append([a.shape[axis] for axis in axes])
+
+    return (
+        dpnp.transpose(a, transpose_axes).reshape(
+            tuple([int(numpy.prod(shape)) for shape in shapes])
+        ),
+        shapes,
+    )
+
+
+def _flop_count(idx_contraction, inner, num_terms, size_dictionary):
+    """
+    Copied from _flop_count in numpy/core/einsumfunc.py
+
+    Computes the number of FLOPS in the contraction.
+
+    Parameters
+    ----------
+    idx_contraction : iterable
+        The indices involved in the contraction
+    inner : bool
+        Does this contraction require an inner product?
+    num_terms : int
+        The number of terms in a contraction
+    size_dictionary : dict
+        The size of each of the indices in idx_contraction
+
+    Returns
+    -------
+    flop_count : int
+        The total number of FLOPS required for the contraction.
+
+    Examples
+    --------
+    >>> import dpnp.dpnp_utils.dpnp_utils_linearalgebra as np_util
+    >>> np_util._flop_count("abc", False, 1, {"a": 2, "b":3, "c":5})
+    30
+
+    >>> np_util._flop_count("abc", True, 2, {"a": 2, "b":3, "c":5})
+    60
+
+    """
+
+    overall_size = _compute_size_by_dict(idx_contraction, size_dictionary)
+    op_factor = max(1, num_terms - 1)
+    if inner:
+        op_factor += 1
+
+    return overall_size * op_factor
+
+
 def _gemm_batch_matmul(exec_q, x1, x2, res, x1_is_2D, x2_is_2D, dev_tasks_list):
     # If input array is F-contiguous, we need to change the order to C-contiguous.
     # because mkl::gemm_bacth needs each 2D array to be F-contiguous but
@@ -153,6 +539,199 @@ def _gemm_batch_matmul(exec_q, x1, x2, res, x1_is_2D, x2_is_2D, dev_tasks_list):
     return ht_blas_ev, ht_tasks_list, res
 
 
+def _greedy_path(input_sets, output_set, idx_dict, memory_limit):
+    """
+    Copied from _greedy_path in numpy/core/einsumfunc.py
+
+    Finds the path by contracting the best pair until the input list is
+    exhausted. The best pair is found by minimizing the tuple
+    ``(-prod(indices_removed), cost)``.  What this amounts to is prioritizing
+    matrix multiplication or inner product operations, then Hadamard like
+    operations, and finally outer operations. Outer products are limited by
+    ``memory_limit``. This algorithm scales cubically with respect to the
+    number of elements in the list ``input_sets``.
+
+    Parameters
+    ----------
+    input_sets : list of sets
+        List of sets that represent the lhs side of the einsum subscript
+    output_set : set
+        Set that represents the rhs side of the overall einsum subscript
+    idx_dict : dictionary
+        Dictionary of index sizes
+    memory_limit : int
+        The maximum number of elements in a temporary array
+
+    Returns
+    -------
+    path : list
+        The greedy contraction order within the memory limit constraint.
+
+    Examples
+    --------
+    >>> import dpnp.dpnp_utils.dpnp_utils_linearalgebra as np_util
+    >>> isets = [set("abd"), set("ac"), set("bdc")]
+    >>> oset = set("")
+    >>> idx_sizes = {"a": 1, "b":2, "c":3, "d":4}
+    >>> _greedy_path(isets, oset, idx_sizes, 5000)
+    [(0, 2), (0, 1)]
+
+    """
+
+    # Handle trivial cases that leaked through
+    if len(input_sets) == 1:
+        return [(0,)]
+    elif len(input_sets) == 2:
+        return [(0, 1)]
+
+    # Build up a naive cost
+    _, _, idx_removed, idx_contract = _find_contraction(
+        range(len(input_sets)), input_sets, output_set
+    )
+    naive_cost = _flop_count(
+        idx_contract, idx_removed, len(input_sets), idx_dict
+    )
+
+    # Initially iterate over all pairs
+    comb_iter = itertools.combinations(range(len(input_sets)), 2)
+    known_contractions = []
+
+    path_cost = 0
+    path = []
+    for _ in range(len(input_sets) - 1):
+        # Iterate over all pairs on first step, only previously found pairs on subsequent steps
+        for positions in comb_iter:
+            # Always initially ignore outer products
+            if input_sets[positions[0]].isdisjoint(input_sets[positions[1]]):
+                continue
+
+            result = _parse_possible_contraction(
+                positions,
+                input_sets,
+                output_set,
+                idx_dict,
+                memory_limit,
+                path_cost,
+                naive_cost,
+            )
+            if result is not None:
+                known_contractions.append(result)
+
+        # If we do not have a inner contraction, rescan pairs including outer products
+        if len(known_contractions) == 0:
+            # Then check the outer products
+            for positions in itertools.combinations(range(len(input_sets)), 2):
+                result = _parse_possible_contraction(
+                    positions,
+                    input_sets,
+                    output_set,
+                    idx_dict,
+                    memory_limit,
+                    path_cost,
+                    naive_cost,
+                )
+                if result is not None:
+                    known_contractions.append(result)
+
+            # If we still did not find any remaining contractions, default back to einsum like behavior
+            if len(known_contractions) == 0:
+                path.append(tuple(range(len(input_sets))))
+                break
+
+        # Sort based on first index
+        best = min(known_contractions, key=lambda x: x[0])
+
+        # Now propagate as many unused contractions as possible to next iteration
+        known_contractions = _update_other_results(known_contractions, best)
+
+        # Next iteration only compute contractions with the new tensor
+        # All other contractions have been accounted for
+        input_sets = best[2]
+        new_tensor_pos = len(input_sets) - 1
+        comb_iter = ((i, new_tensor_pos) for i in range(new_tensor_pos))
+
+        # Update path and total cost
+        path.append(best[1])
+        path_cost += best[0][1]
+
+    return path
+
+
+def _iter_path_pairs(path):
+    """
+    Copied from _iter_path_pairs in cupy/core/_einsum.py
+
+    Decompose path into binary path
+
+    Parameters
+    ----------
+    path : sequence of tuples of ints
+
+    Yields
+    ------
+    tuple of ints
+        pair (idx0, idx1) that represents the operation
+        {pop(idx0); pop(idx1); append();}
+
+    """
+
+    for indices in path:
+        assert all(idx >= 0 for idx in indices)
+        # [3, 1, 4, 9] -> [(9, 4), (-1, 3), (-1, 1)]
+        if len(indices) >= 2:
+            indices = sorted(indices, reverse=True)
+            yield indices[0], indices[1]
+            for idx in indices[2:]:
+                yield -1, idx
+
+
+def _index_linear_to_tuple(shape, linear_id):
+    """
+    Convert a linear index to a tuple of indices in a multi-dimensional array.
+
+    Parameters
+    ----------
+    shape : tuple
+        The shape of the multi-dimensional array.
+    linear_id : int
+        The linear index to convert.
+
+    Returns
+    -------
+    out: tuple
+        A tuple of indices corresponding to the linear index.
+
+    """
+
+    len_shape = len(shape)
+    indices = [0] * len_shape
+    for i in range(len_shape):
+        prod_res = _compute_size(i + 1, shape)
+        indices[i] = linear_id // prod_res
+        linear_id %= prod_res
+
+    return tuple(indices)
+
+
+def _make_transpose_axes(sub, b_dims, c_dims):
+    """Copied from _make_transpose_axes in cupy/core/_einsum.py"""
+    bs = []
+    cs = []
+    ts = []
+    for axis, label in enumerate(sub):
+        if label in b_dims:
+            bs.append((label, axis))
+        elif label in c_dims:
+            cs.append((label, axis))
+        else:
+            ts.append((label, axis))
+    return (
+        _tuple_sorted_by_0(bs),
+        _tuple_sorted_by_0(cs),
+        _tuple_sorted_by_0(ts),
+    )
+
+
 def _op_res_dtype(*arrays, dtype, casting, sycl_queue):
     """
     _op_res_dtype(*arrays, dtype, casting, sycl_queue)
@@ -176,7 +755,7 @@ def _op_res_dtype(*arrays, dtype, casting, sycl_queue):
         Input arrays.
     dtype : dtype
         If not ``None``, data type of the output array.
-    casting : {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}, optional
+    casting : {"no", "equiv", "safe", "same_kind", "unsafe"}, optional
         Controls what kind of data casting may occur.
     sycl_queue : {SyclQueue}
         A SYCL queue to use for determining default floating point datat type.
@@ -210,6 +789,336 @@ def _op_res_dtype(*arrays, dtype, casting, sycl_queue):
     return op_dtype, res_dtype
 
 
+def _optimal_path(input_sets, output_set, idx_dict, memory_limit):
+    """
+    Copied from _optimal_path in numpy/core/einsumfunc.py
+
+    Computes all possible pair contractions, sieves the results based
+    on ``memory_limit`` and returns the lowest cost path. This algorithm
+    scales factorial with respect to the elements in the list ``input_sets``.
+
+    Parameters
+    ----------
+    input_sets : list of sets
+        List of sets that represent the lhs side of the einsum subscript
+    output_set : set
+        Set that represents the rhs side of the overall einsum subscript
+    idx_dict : dictionary
+        Dictionary of index sizes
+    memory_limit : int
+        The maximum number of elements in a temporary array
+
+    Returns
+    -------
+    path : list
+        The optimal contraction order within the memory limit constraint.
+
+    Examples
+    --------
+    >>> import dpnp.dpnp_utils.dpnp_utils_linearalgebra as np_util
+    >>> isets = [set("abd"), set("ac"), set("bdc")]
+    >>> oset = set("")
+    >>> idx_sizes = {"a": 1, "b":2, "c":3, "d":4}
+    >>> np_util._optimal_path(isets, oset, idx_sizes, 5000)
+    [(0, 2), (0, 1)]
+
+    """
+
+    full_results = [(0, [], input_sets)]
+    for iteration in range(len(input_sets) - 1):
+        iter_results = []
+
+        # Compute all unique pairs
+        for curr in full_results:
+            cost, positions, remaining = curr
+            for con in itertools.combinations(
+                range(len(input_sets) - iteration), 2
+            ):
+                # Find the contraction
+                cont = _find_contraction(con, remaining, output_set)
+                new_result, new_input_sets, idx_removed, idx_contract = cont
+
+                # Sieve the results based on memory_limit
+                new_size = _compute_size_by_dict(new_result, idx_dict)
+                if new_size > memory_limit:
+                    continue
+
+                # Build (total_cost, positions, indices_remaining)
+                total_cost = cost + _flop_count(
+                    idx_contract, idx_removed, len(con), idx_dict
+                )
+                new_pos = positions + [con]
+                iter_results.append((total_cost, new_pos, new_input_sets))
+
+        # Update combinatorial list, if we did not find anything return best
+        # path + remaining contractions
+        if iter_results:
+            full_results = iter_results
+        else:
+            path = min(full_results, key=lambda x: x[0])[1]
+            path += [tuple(range(len(input_sets) - iteration))]
+            return path
+
+    path = min(full_results, key=lambda x: x[0])[1]
+    return path
+
+
+def _parse_einsum_input(args):
+    """
+    Copied from _parse_einsum_input in cupy/core/_einsum.py
+
+    Parse einsum operands.
+
+    Parameters
+    ----------
+    args : tuple
+        The non-keyword arguments to einsum.
+
+    Returns
+    -------
+    input_strings : str
+        Parsed input strings.
+    output_string : str
+        Parsed output string.
+    operands : list of array_like
+        The operands to use in the contraction.
+
+    Examples
+    --------
+    The operand list is simplified to reduce printing:
+
+    >>> import dpnp as np
+    >>> import dpnp.dpnp_utils.dpnp_utils_linearalgebra as np_util
+    >>> a = np.random.rand(4, 4)
+    >>> b = np.random.rand(4, 4, 4)
+    >>> np_util._parse_einsum_input(("...a,...a->...", a, b))
+    (['@a', '@a'], '@', [a, b])
+
+    >>> np_util._parse_einsum_input((a, [Ellipsis, 0], b, [Ellipsis, 0]))
+    (['@a', '@b'], None, [a, b])
+
+    """
+
+    if len(args) == 0:
+        raise ValueError(
+            "must specify the einstein sum subscripts string and at least one "
+            "operand, or at least one operand and its corresponding "
+            "subscripts list"
+        )
+
+    if isinstance(args[0], str):
+        subscripts = args[0].replace(" ", "")
+        operands = list(args[1:])
+
+        # Ensure all characters are valid
+        for s in subscripts:
+            if s in ".,->":
+                continue
+            if s not in _einsum_symbols:
+                raise ValueError(
+                    f"invalid subscript '{s}' in einstein sum subscripts "
+                    "string, subscripts must be letters"
+                )
+
+        # Parse "..."
+        subscripts = subscripts.replace("...", "@")
+        if "." in subscripts:
+            raise ValueError(
+                "einstein sum subscripts string contains a '.' that is not "
+                "part of an ellipsis ('...')"
+            )
+
+        # Parse "->"
+        if ("-" in subscripts) or (">" in subscripts):
+            # Check for proper "->"
+            invalid = subscripts.count("-") > 1 or subscripts.count(">") > 1
+            subscripts = subscripts.split("->")
+            if invalid or len(subscripts) != 2:
+                raise ValueError(
+                    "einstein sum subscript string does not contain proper "
+                    "'->' output specified"
+                )
+            input_subscripts, output_subscript = subscripts
+
+        else:
+            input_subscripts = subscripts
+            output_subscript = None
+
+        input_subscripts = input_subscripts.split(",")
+        if len(input_subscripts) != len(operands):
+            msg = "more" if len(operands) > len(input_subscripts) else "fewer"
+            raise ValueError(
+                msg + " operands provided to einstein sum function than "
+                "specified in the subscripts string"
+            )
+    else:
+        args = list(args)
+        operands = []
+        input_subscripts = []
+        while len(args) >= 2:
+            operands.append(args.pop(0))
+            input_subscripts.append(_parse_int_subscript(args.pop(0)))
+        if args:
+            output_subscript = _parse_int_subscript(args[0])
+        else:
+            output_subscript = None
+
+    return input_subscripts, output_subscript, operands
+
+
+def _parse_ellipsis_subscript(subscript, idx, ndim=None, ellipsis_len=None):
+    """
+    Copied from _parse_ellipsis_subscript in cupy/core/_einsum.py
+
+    Parse a subscript that may contain ellipsis
+
+    Parameters
+    ----------
+    subscript : str
+        An einsum subscript of an operand or an output. "..."
+        should be replaced by "@".
+    idx : {int, ``None``}
+        For error messages, give int idx for the idx-th operand or ``None``
+        for the output.
+    ndim : int, optional
+        ndim of the operand
+    ellipsis_len : int, optional
+        number of broadcast dimensions of the output.
+
+    Returns
+    -------
+    out : list of ints
+        The parsed subscript
+
+    """
+    subs = subscript.split("@")
+    if len(subs) == 1:
+        (sub,) = subs
+        if ndim is not None and len(sub) != ndim:
+            if len(sub) > ndim:
+                raise ValueError(
+                    f"einstein sum subscripts string {sub} contains too many "
+                    f"subscripts for operand {idx}"
+                )
+            raise ValueError(
+                f"operand {idx} has more dimensions than subscripts string "
+                f"{sub} given in einstein sum, but no '...' ellipsis "
+                "provided to broadcast the extra dimensions."
+            )
+        return [ord(label) for label in sub]
+    elif len(subs) == 2:
+        left_sub, right_sub = subs
+        if ndim is not None:
+            ellipsis_len = ndim - (len(left_sub) + len(right_sub))
+        if ellipsis_len < 0:
+            raise ValueError(
+                f"einstein sum subscripts string {left_sub}...{right_sub} "
+                f"contains too many subscripts for operand {idx}"
+            )
+        ret = []
+        ret.extend(ord(label) for label in left_sub)
+        ret.extend(range(-ellipsis_len, 0))
+        ret.extend(ord(label) for label in right_sub)
+        return ret
+    else:
+        # >= 2 ellipses for an operand
+        raise ValueError(
+            "einstein sum subscripts string contains a '.' that is not "
+            "part of an ellipsis ('...') "
+            + ("in the output" if idx is None else f"for operand {idx}")
+        )
+
+
+def _parse_int_subscript(list_subscript):
+    """Copied from _parse_int_subscript in cupy/core/_einsum.py"""
+
+    str_subscript = ""
+    for s in list_subscript:
+        if s is Ellipsis:
+            str_subscript += "@"
+        else:
+            try:
+                s = operator.index(s)
+            except TypeError as e:
+                raise TypeError(
+                    "For this input type lists must contain "
+                    "either int or Ellipsis"
+                ) from e
+            str_subscript += _einsum_symbols[s]
+    return str_subscript
+
+
+def _parse_possible_contraction(
+    positions,
+    input_sets,
+    output_set,
+    idx_dict,
+    memory_limit,
+    path_cost,
+    naive_cost,
+):
+    """
+    Copied from _parse_possible_contraction in numpy/core/einsumfunc.py
+
+    Compute the cost (removed size + flops) and resultant indices for
+    performing the contraction specified by ``positions``.
+
+    Parameters
+    ----------
+    positions : tuple of int
+        The locations of the proposed tensors to contract.
+    input_sets : list of sets
+        The indices found on each tensors.
+    output_set : set
+        The output indices of the expression.
+    idx_dict : dict
+        Mapping of each index to its size.
+    memory_limit : int
+        The total allowed size for an intermediary tensor.
+    path_cost : int
+        The contraction cost so far.
+    naive_cost : int
+        The cost of the unoptimized expression.
+
+    Returns
+    -------
+    cost : (int, int)
+        A tuple containing the size of any indices removed, and the flop cost.
+    positions : tuple of int
+        The locations of the proposed tensors to contract.
+    new_input_sets : list of sets
+        The resulting new list of indices if this proposed contraction is performed.
+
+    """
+
+    # Find the contraction
+    contract = _find_contraction(positions, input_sets, output_set)
+    idx_result, new_input_sets, idx_removed, idx_contract = contract
+
+    # Sieve the results based on memory_limit
+    new_size = _compute_size_by_dict(idx_result, idx_dict)
+    if new_size > memory_limit:
+        return None
+
+    # Build sort tuple
+    old_sizes = (
+        _compute_size_by_dict(input_sets[p], idx_dict) for p in positions
+    )
+    removed_size = sum(old_sizes) - new_size
+
+    # NB: removed_size used to be just the size of any removed indices i.e.:
+    #     helpers.compute_size_by_dict(idx_removed, idx_dict)
+    cost = _flop_count(idx_contract, idx_removed, len(positions), idx_dict)
+    sort = (-removed_size, cost)
+
+    # Sieve based on total cost as well
+    if (path_cost + cost) >= naive_cost:
+        return None
+
+    # Add contraction to possible choices
+    return [sort, positions, new_input_sets]
+
+
 def _shape_error(a, b, core_dim, err_msg):
     if err_msg == 0:
         raise ValueError(
@@ -235,6 +1144,50 @@ def _shape_error(a, b, core_dim, err_msg):
         )
 
 
+def _reduced_binary_einsum(arr0, sub0, arr1, sub1, sub_others):
+    """Copied from _reduced_binary_einsum in cupy/core/_einsum.py"""
+
+    set0 = set(sub0)
+    set1 = set(sub1)
+    assert len(set0) == len(sub0), "operand 0 should be reduced: diagonal"
+    assert len(set1) == len(sub1), "operand 1 should be reduced: diagonal"
+
+    if len(sub0) == 0 or len(sub1) == 0:
+        return arr0 * arr1, sub0 + sub1
+
+    set_others = set(sub_others)
+    shared = set0 & set1
+    batch_dims = shared & set_others
+    contract_dims = shared - batch_dims
+
+    bs0, cs0, ts0 = _make_transpose_axes(sub0, batch_dims, contract_dims)
+    bs1, cs1, ts1 = _make_transpose_axes(sub1, batch_dims, contract_dims)
+
+    sub_b = [sub0[axis] for axis in bs0]
+    assert sub_b == [sub1[axis] for axis in bs1]
+    sub_l = [sub0[axis] for axis in ts0]
+    sub_r = [sub1[axis] for axis in ts1]
+
+    sub_out = sub_b + sub_l + sub_r
+    assert set(sub_out) <= set_others, "operands should be reduced: unary sum"
+
+    if len(contract_dims) == 0:
+        # Use element-wise multiply when no contraction is needed
+        if len(sub_out) == len(sub_others):
+            # to assure final output of einsum is C-contiguous
+            sub_out = sub_others
+        arr0 = _expand_dims_transpose(arr0, sub0, sub_out)
+        arr1 = _expand_dims_transpose(arr1, sub1, sub_out)
+        return arr0 * arr1, sub_out
+
+    tmp0, shapes0 = _flatten_transpose(arr0, [bs0, ts0, cs0])
+    tmp1, shapes1 = _flatten_transpose(arr1, [bs1, cs1, ts1])
+    shapes_out = shapes0[0] + shapes0[1] + shapes1[2]
+    assert shapes0[0] == shapes1[0]
+    arr_out = dpnp.matmul(tmp0, tmp1).reshape(shapes_out)
+    return arr_out, sub_out
+
+
 def _standardize_strides(strides, inherently_2D, shape, ndim):
     """
     Standardizing the strides.
@@ -266,6 +1219,85 @@ def _standardize_strides(strides, inherently_2D, shape, ndim):
     return stndrd_strides
 
 
+def _tuple_sorted_by_0(zs):
+    """Copied from _tuple_sorted_by_0 in cupy/core/_einsum.py"""
+    return tuple(i for _, i in sorted(zs))
+
+
+def _transpose_ex(a, axeses):
+    """
+    Copied from _transpose_ex in cupy/core/_einsum.py
+
+    Transpose and diagonal
+
+    Parameters
+    ----------
+    a : {dpnp.ndarray, usm_ndarray}
+        Input array.
+    axeses : sequence of sequences of ints
+        Axeses
+
+    Returns
+    -------
+    out : dpnp.ndarray
+        `a` with its axes permutated. A writeable view is returned
+        whenever possible.
+    """
+
+    shape = []
+    strides = []
+    for axes in axeses:
+        shape.append(a.shape[axes[0]] if axes else 1)
+        stride = sum(a.strides[axis] for axis in axes)
+        strides.append(stride)
+
+    # TODO: replace with a.view() when it is implemented in dpnp
+    a = _view_work_around(a, shape, strides)
+    return a
+
+
+def _update_other_results(results, best):
+    """
+    Copied from _update_other_results in numpy/core/einsumfunc.py
+
+    Update the positions and provisional input_sets of ``results`` based on
+    performing the contraction result ``best``. Remove any involving the tensors
+    contracted.
+
+    Parameters
+    ----------
+    results : list
+        List of contraction results produced by ``_parse_possible_contraction``.
+    best : list
+        The best contraction of ``results`` i.e. the one that will be performed.
+
+    Returns
+    -------
+    mod_results : list
+        The list of modified results, updated with outcome of ``best`` contraction.
+    """
+
+    best_con = best[1]
+    bx, by = best_con
+    mod_results = []
+
+    for cost, (x, y), con_sets in results:
+        # Ignore results involving tensors just contracted
+        if x in best_con or y in best_con:
+            continue
+
+        # Update the input_sets
+        del con_sets[by - int(by > x) - int(by > y)]
+        del con_sets[bx - int(bx > x) - int(bx > y)]
+        con_sets.insert(-1, best[2][-1])
+
+        # Update the position indices
+        mod_con = x - int(x > bx) - int(x > by), y - int(y > bx) - int(y > by)
+        mod_results.append((cost, mod_con, con_sets))
+
+    return mod_results
+
+
 def _validate_axes(x1, x2, axes):
     """Check axes is valid for matmul function."""
 
@@ -321,6 +1353,39 @@ def _validate_internal(axes, i, ndim):
     return axes
 
 
+def _view_work_around(a, shape, strides):
+    """
+    Create a copy of the input array with the specified shape and strides.
+
+    Parameters
+    ----------
+    a : {dpnp.ndarray, usm_ndarray}
+        The input array.
+    shape : tuple
+        The desired shape of the output array.
+    strides : tuple
+        The desired strides of the output array.
+
+    Returns
+    -------
+    out : dpnp.ndarray
+        A copy of the input array with the specified shape and strides.
+
+    """
+
+    n_size = numpy.prod(shape)
+    b = dpnp.empty(
+        n_size, dtype=a.dtype, usm_type=a.usm_type, sycl_queue=a.sycl_queue
+    )
+    for linear_id in range(n_size):
+        offset = _calc_offset(shape, linear_id, strides)
+        indices = _index_linear_to_tuple(a.shape, offset)
+        b[linear_id] = a[indices]
+    b = b.reshape(tuple(shape))
+
+    return b
+
+
 def dpnp_cross(a, b, cp, exec_q):
     """Return the cross product of two (arrays of) vectors."""
 
@@ -476,6 +1541,217 @@ def dpnp_cross(a, b, cp, exec_q):
     return cp
 
 
+def dpnp_einsum(
+    *operands, out=None, dtype=None, order="K", casting="safe", optimize=False
+):
+    """Evaluates the Einstein summation convention on the operands."""
+
+    input_subscripts, output_subscript, operands = _parse_einsum_input(operands)
+    assert isinstance(input_subscripts, list)
+    assert isinstance(operands, list)
+
+    dpnp.check_supported_arrays_type(*operands, scalar_type=True)
+    arrays = []
+    for a in operands:
+        if dpnp.is_supported_array_type(a):
+            arrays.append(a)
+
+    res_usm_type, exec_q = get_usm_allocations(arrays)
+    result_dtype = dpnp.result_type(*arrays) if dtype is None else dtype
+    for id, a in enumerate(operands):
+        if dpnp.isscalar(a):
+            operands[id] = dpnp.array(
+                a, dtype=result_dtype, usm_type=res_usm_type, sycl_queue=exec_q
+            )
+
+    input_subscripts = [
+        _parse_ellipsis_subscript(sub, idx, ndim=arr.ndim)
+        for idx, (sub, arr) in enumerate(zip(input_subscripts, operands))
+    ]
+
+    # Get length of each unique dimension and ensure all dimensions are correct
+    dimension_dict = {}
+    for idx, sub in enumerate(input_subscripts):
+        sh = operands[idx].shape
+        for axis, label in enumerate(sub):
+            dim = sh[axis]
+            if label in dimension_dict.keys():
+                # For broadcasting cases we always want the largest dim size
+                if dimension_dict[label] == 1:
+                    dimension_dict[label] = dim
+                elif dim not in (1, dimension_dict[label]):
+                    dim_old = dimension_dict[label]
+                    raise ValueError(
+                        f"Size of label '{_chr(label)}' for operand {idx} ({dim}) "
+                        f"does not match previous terms ({dim_old})."
+                    )
+            else:
+                dimension_dict[label] = dim
+
+    if output_subscript is None:
+        # Build output subscripts
+        tmp_subscripts = list(itertools.chain.from_iterable(input_subscripts))
+        output_subscript = [
+            label
+            for label in sorted(set(tmp_subscripts))
+            if label < 0 or tmp_subscripts.count(label) == 1
+        ]
+    else:
+        if "@" not in output_subscript and -1 in dimension_dict:
+            raise ValueError(
+                "output has more dimensions than subscripts "
+                "given in einstein sum, but no '...' ellipsis "
+                "provided to broadcast the extra dimensions."
+            )
+        output_subscript = _parse_ellipsis_subscript(
+            output_subscript,
+            None,
+            ellipsis_len=sum(label < 0 for label in dimension_dict.keys()),
+        )
+
+        # Make sure output subscripts are in the input
+        tmp_subscripts = set(itertools.chain.from_iterable(input_subscripts))
+        for label in output_subscript:
+            if label not in tmp_subscripts:
+                raise ValueError(
+                    "einstein sum subscripts string included output subscript "
+                    f"'{_chr(label)}' which never appeared in an input."
+                )
+        if len(output_subscript) != len(set(output_subscript)):
+            for label in output_subscript:
+                if output_subscript.count(label) >= 2:
+                    raise ValueError(
+                        "einstein sum subscripts string includes output "
+                        f"subscript '{_chr(label)}' multiple times."
+                    )
+
+    _einsum_diagonals(input_subscripts, operands)
+
+    # no more raises
+    if len(operands) >= 2:
+        if any(arr.size == 0 for arr in operands):
+            return dpnp.zeros(
+                tuple(dimension_dict[label] for label in output_subscript),
+                dtype=result_dtype,
+                usm_type=res_usm_type,
+                sycl_queue=exec_q,
+            )
+
+        # Don't squeeze if unary, because this affects later (in trivial sum)
+        # whether the return is a writeable view.
+        for idx in range(len(operands)):
+            arr = operands[idx]
+            if 1 in arr.shape:
+                squeeze_indices = []
+                sub = []
+                for axis, label in enumerate(input_subscripts[idx]):
+                    if arr.shape[axis] == 1:
+                        squeeze_indices.append(axis)
+                    else:
+                        sub.append(label)
+                input_subscripts[idx] = sub
+                operands[idx] = dpnp.squeeze(arr, axis=tuple(squeeze_indices))
+                assert operands[idx].ndim == len(input_subscripts[idx])
+            del arr
+
+    # unary einsum without summation should return a (writeable) view
+    returns_view = len(operands) == 1
+
+    # unary sum
+    for idx, sub in enumerate(input_subscripts):
+        other_subscripts = copy.copy(input_subscripts)
+        other_subscripts[idx] = output_subscript
+        other_subscripts = set(itertools.chain.from_iterable(other_subscripts))
+        sum_axes = tuple(
+            axis
+            for axis, label in enumerate(sub)
+            if label not in other_subscripts
+        )
+        if sum_axes:
+            returns_view = False
+            input_subscripts[idx] = [
+                label for axis, label in enumerate(sub) if axis not in sum_axes
+            ]
+
+            operands[idx] = operands[idx].sum(axis=sum_axes, dtype=result_dtype)
+
+    if returns_view:
+        # TODO: replace with a.view() when it is implemented in dpnp
+        operands = [a for a in operands]
+    else:
+        operands = [
+            dpnp.astype(
+                a, result_dtype, copy=False, casting=casting, order=order
+            )
+            for a in operands
+        ]
+
+    # no more casts
+    optimize_algorithms = {
+        "greedy": _greedy_path,
+        "optimal": _optimal_path,
+    }
+    if optimize is False:
+        path = [tuple(range(len(operands)))]
+    elif len(optimize) and (optimize[0] == "einsum_path"):
+        path = optimize[1:]
+    else:
+        try:
+            if len(optimize) == 2 and isinstance(optimize[1], (int, float)):
+                algo = optimize_algorithms[optimize[0]]
+                memory_limit = int(optimize[1])
+            else:
+                algo = optimize_algorithms[optimize]
+                memory_limit = 2**31
+        except (TypeError, KeyError):  # unhashable type or not found
+            raise TypeError(
+                f"Did not understand the path (optimize): {str(optimize)}"
+            )
+        input_sets = [set(sub) for sub in input_subscripts]
+        output_set = set(output_subscript)
+        path = algo(input_sets, output_set, dimension_dict, memory_limit)
+        if any(len(indices) > 2 for indices in path):
+            warnings.warn(
+                "memory efficient einsum is not supported yet",
+                RuntimeWarning,
+                stacklevel=2,
+            )
+
+    for idx0, idx1 in _iter_path_pairs(path):
+        # "reduced" binary einsum
+        arr0 = operands.pop(idx0)
+        sub0 = input_subscripts.pop(idx0)
+        arr1 = operands.pop(idx1)
+        sub1 = input_subscripts.pop(idx1)
+        sub_others = list(
+            itertools.chain(
+                output_subscript,
+                itertools.chain.from_iterable(input_subscripts),
+            )
+        )
+        arr_out, sub_out = _reduced_binary_einsum(
+            arr0, sub0, arr1, sub1, sub_others
+        )
+        operands.append(arr_out)
+        input_subscripts.append(sub_out)
+        del arr0, arr1
+
+    # unary einsum at last
+    (arr0,) = operands
+    (sub0,) = input_subscripts
+
+    transpose_axes = []
+    for label in output_subscript:
+        if label in sub0:
+            transpose_axes.append(sub0.index(label))
+
+    arr_out = arr0.transpose(transpose_axes).reshape(
+        [dimension_dict[label] for label in output_subscript]
+    )
+    assert returns_view or arr_out.dtype == result_dtype
+    return dpnp.get_result_array(arr_out, out)
+
+
 def dpnp_kron(a, b, a_ndim, b_ndim):
     """Returns the kronecker product of two arrays."""
 
@@ -672,8 +1948,8 @@ def dpnp_matmul(
 
     # find the result shape
     if x1_is_2D and x2_is_2D:
-        x1 = x1.reshape(x1.shape[-2], x1.shape[-1])
-        x2 = x2.reshape(x2.shape[-2], x2.shape[-1])
+        x1 = dpnp.reshape(x1, (x1.shape[-2], x1.shape[-1]))
+        x2 = dpnp.reshape(x2, (x2.shape[-2], x2.shape[-1]))
         res_shape = (x1.shape[-2], x2.shape[-1])
     else:
         # makes the dimension of input the same by adding new axis
diff --git a/dpnp/linalg/dpnp_iface_linalg.py b/dpnp/linalg/dpnp_iface_linalg.py
index b10df5863f72..ed852054e284 100644
--- a/dpnp/linalg/dpnp_iface_linalg.py
+++ b/dpnp/linalg/dpnp_iface_linalg.py
@@ -781,7 +781,9 @@ def matrix_rank(A, tol=None, hermitian=False):
 
     dpnp.check_supported_arrays_type(A)
     if tol is not None:
-        dpnp.check_supported_arrays_type(tol, scalar_type=True)
+        dpnp.check_supported_arrays_type(
+            tol, scalar_type=True, all_scalars=True
+        )
 
     return dpnp_matrix_rank(A, tol=tol, hermitian=hermitian)
 
@@ -895,7 +897,7 @@ def pinv(a, rcond=1e-15, hermitian=False):
     """
 
     dpnp.check_supported_arrays_type(a)
-    dpnp.check_supported_arrays_type(rcond, scalar_type=True)
+    dpnp.check_supported_arrays_type(rcond, scalar_type=True, all_scalars=True)
     assert_stacked_2d(a)
 
     return dpnp_pinv(a, rcond=rcond, hermitian=hermitian)
diff --git a/tests/skipped_tests.tbl b/tests/skipped_tests.tbl
index 9ef4091f7482..d6fd82b9e3e3 100644
--- a/tests/skipped_tests.tbl
+++ b/tests/skipped_tests.tbl
@@ -263,29 +263,6 @@ tests/third_party/cupy/indexing_tests/test_iterate.py::TestFlatiterSubscriptInde
 tests/third_party/cupy/indexing_tests/test_iterate.py::TestFlatiterSubscriptIndexError_param_3_{index=[0], shape=(2, 3, 4)}::test_getitem
 tests/third_party/cupy/indexing_tests/test_iterate.py::TestFlatiterSubscriptIndexError_param_3_{index=[0], shape=(2, 3, 4)}::test_setitem
 
-tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumBinaryOperationWithScalar::test_scalar_1
-tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumBinaryOperationWithScalar::test_scalar_2
-tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumLarge_param_10_{opt='optimal', subscript='chd,bde,agbc,hiad,bdi,cgh,agdb'}::test_einsum
-tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumLarge_param_11_{opt='optimal', subscript='eb,cb,fb->cef'}::test_einsum
-tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumLarge_param_12_{opt='optimal', subscript='dd,fb,be,cdb->cef'}::test_einsum
-tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumLarge_param_13_{opt='optimal', subscript='bca,cdb,dbf,afc->'}::test_einsum
-tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumLarge_param_14_{opt='optimal', subscript='dcc,fce,ea,dbf->ab'}::test_einsum
-tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumLarge_param_15_{opt='optimal', subscript='a,ac,ab,ad,cd,bd,bc->'}::test_einsum
-tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumLarge_param_1_{opt=('greedy', 0), subscript='acdf,jbje,gihb,hfac'}::test_einsum
-tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumLarge_param_2_{opt='greedy', subscript='acdf,jbje,gihb,hfac,gfac,gifabc,hfac'}::test_einsum
-tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumLarge_param_3_{opt='greedy', subscript='chd,bde,agbc,hiad,bdi,cgh,agdb'}::test_einsum
-tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumLarge_param_4_{opt='greedy', subscript='eb,cb,fb->cef'}::test_einsum
-tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumLarge_param_5_{opt='greedy', subscript='dd,fb,be,cdb->cef'}::test_einsum
-tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumLarge_param_6_{opt='greedy', subscript='bca,cdb,dbf,afc->'}::test_einsum
-tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumLarge_param_7_{opt='greedy', subscript='dcc,fce,ea,dbf->ab'}::test_einsum
-tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumLarge_param_8_{opt='greedy', subscript='a,ac,ab,ad,cd,bd,bc->'}::test_einsum
-tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumLarge_param_9_{opt='optimal', subscript='acdf,jbje,gihb,hfac,gfac,gifabc,hfac'}::test_einsum
-tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumUnaryOperationWithScalar::test_scalar_float
-tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumUnaryOperationWithScalar::test_scalar_int
-tests/third_party/cupy/linalg_tests/test_einsum.py::TestListArgEinSumError::test_dim_mismatch3
-tests/third_party/cupy/linalg_tests/test_einsum.py::TestListArgEinSumError::test_invalid_sub1
-tests/third_party/cupy/linalg_tests/test_einsum.py::TestListArgEinSumError::test_too_many_dims3
-
 tests/third_party/cupy/manipulation_tests/test_dims.py::TestBroadcast_param_0_{shapes=[(), ()]}::test_broadcast
 tests/third_party/cupy/manipulation_tests/test_dims.py::TestBroadcast_param_10_{shapes=[(0, 1, 1, 0, 3), (5, 2, 0, 1, 0, 0, 3), (2, 1, 0, 0, 0, 3)]}::test_broadcast
 tests/third_party/cupy/manipulation_tests/test_dims.py::TestBroadcast_param_1_{shapes=[(0,), (0,)]}::test_broadcast
diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl
index 4688c1787fc1..73087bde65e0 100644
--- a/tests/skipped_tests_gpu.tbl
+++ b/tests/skipped_tests_gpu.tbl
@@ -87,9 +87,6 @@ tests/third_party/cupy/core_tests/test_ndarray_conversion.py::TestNdarrayToBytes
 tests/third_party/cupy/core_tests/test_ndarray_conversion.py::TestNdarrayToBytes_param_3_{order='C', shape=(2, 3)}::test_item
 tests/third_party/cupy/core_tests/test_ndarray_conversion.py::TestNdarrayToBytes_param_4_{order='F', shape=(2, 3)}::test_item
 
-tests/third_party/cupy/linalg_tests/test_einsum.py::TestListArgEinSumError::test_dim_mismatch3
-tests/third_party/cupy/linalg_tests/test_einsum.py::TestListArgEinSumError::test_too_many_dims3
-
 tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsMultivariateNormal_param_0_{d=2, shape=(4, 3, 2)}::test_normal
 tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsMultivariateNormal_param_1_{d=2, shape=(3, 2)}::test_normal
 tests/third_party/cupy/random_tests/test_distributions.py::TestDistributionsMultivariateNormal_param_2_{d=4, shape=(4, 3, 2)}::test_normal
@@ -333,27 +330,6 @@ tests/third_party/cupy/indexing_tests/test_iterate.py::TestFlatiterSubscriptInde
 tests/third_party/cupy/indexing_tests/test_iterate.py::TestFlatiterSubscriptIndexError_param_3_{index=[0], shape=(2, 3, 4)}::test_getitem
 tests/third_party/cupy/indexing_tests/test_iterate.py::TestFlatiterSubscriptIndexError_param_3_{index=[0], shape=(2, 3, 4)}::test_setitem
 
-tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumBinaryOperationWithScalar::test_scalar_1
-tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumBinaryOperationWithScalar::test_scalar_2
-tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumLarge_param_10_{opt='optimal', subscript='chd,bde,agbc,hiad,bdi,cgh,agdb'}::test_einsum
-tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumLarge_param_11_{opt='optimal', subscript='eb,cb,fb->cef'}::test_einsum
-tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumLarge_param_12_{opt='optimal', subscript='dd,fb,be,cdb->cef'}::test_einsum
-tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumLarge_param_13_{opt='optimal', subscript='bca,cdb,dbf,afc->'}::test_einsum
-tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumLarge_param_14_{opt='optimal', subscript='dcc,fce,ea,dbf->ab'}::test_einsum
-tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumLarge_param_15_{opt='optimal', subscript='a,ac,ab,ad,cd,bd,bc->'}::test_einsum
-tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumLarge_param_1_{opt=('greedy', 0), subscript='acdf,jbje,gihb,hfac'}::test_einsum
-tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumLarge_param_2_{opt='greedy', subscript='acdf,jbje,gihb,hfac,gfac,gifabc,hfac'}::test_einsum
-tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumLarge_param_3_{opt='greedy', subscript='chd,bde,agbc,hiad,bdi,cgh,agdb'}::test_einsum
-tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumLarge_param_4_{opt='greedy', subscript='eb,cb,fb->cef'}::test_einsum
-tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumLarge_param_5_{opt='greedy', subscript='dd,fb,be,cdb->cef'}::test_einsum
-tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumLarge_param_6_{opt='greedy', subscript='bca,cdb,dbf,afc->'}::test_einsum
-tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumLarge_param_7_{opt='greedy', subscript='dcc,fce,ea,dbf->ab'}::test_einsum
-tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumLarge_param_8_{opt='greedy', subscript='a,ac,ab,ad,cd,bd,bc->'}::test_einsum
-tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumLarge_param_9_{opt='optimal', subscript='acdf,jbje,gihb,hfac,gfac,gifabc,hfac'}::test_einsum
-tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumUnaryOperationWithScalar::test_scalar_float
-tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumUnaryOperationWithScalar::test_scalar_int
-tests/third_party/cupy/linalg_tests/test_einsum.py::TestListArgEinSumError::test_invalid_sub1
-
 tests/third_party/cupy/manipulation_tests/test_dims.py::TestBroadcast_param_0_{shapes=[(), ()]}::test_broadcast
 tests/third_party/cupy/manipulation_tests/test_dims.py::TestBroadcast_param_1_{shapes=[(0,), (0,)]}::test_broadcast
 tests/third_party/cupy/manipulation_tests/test_dims.py::TestBroadcast_param_2_{shapes=[(1,), (1,)]}::test_broadcast
diff --git a/tests/test_linalg.py b/tests/test_linalg.py
index 7e0b99443d61..a45b2826b3ac 100644
--- a/tests/test_linalg.py
+++ b/tests/test_linalg.py
@@ -1,3 +1,5 @@
+from ast import Raise
+
 import dpctl
 import dpctl.tensor as dpt
 import numpy
@@ -584,6 +586,54 @@ def test_eigenvalue_errors(self, func):
             assert_raises(ValueError, dpnp_func, a_dp, UPLO="N")
 
 
+class TestEinsum:
+    def test_einsum_trivial_cases(self):
+        a = inp.arange(25).reshape(5, 5)
+        b = inp.arange(5)
+        a_np = a.asnumpy()
+        b_np = b.asnumpy()
+
+        # one input, no optimization is needed
+        result = inp.einsum("i", b, optimize="greedy")
+        expected = numpy.einsum("i", b_np, optimize="greedy")
+        assert_dtype_allclose(result, expected)
+
+        # two inputs, no optimization is needed
+        result = inp.einsum("ij,jk", a, a, optimize="greedy")
+        expected = numpy.einsum("ij,jk", a_np, a_np, optimize="greedy")
+        assert_dtype_allclose(result, expected)
+
+        # no optimization in optimal mode
+        result = inp.einsum("ij,jk", a, a, optimize=["optimal", 1])
+        expected = numpy.einsum("ij,jk", a_np, a_np, optimize=["optimal", 1])
+        assert_dtype_allclose(result, expected)
+
+        # naive cost equal or smaller than optimized cost
+        result = inp.einsum("i,i,i", b, b, b, optimize="greedy")
+        expected = numpy.einsum("i,i,i", b_np, b_np, b_np, optimize="greedy")
+        assert_dtype_allclose(result, expected)
+
+    def test_einsum_error(self):
+        a = inp.ones((5, 5))
+        # unknown keyword argument
+        with pytest.raises(TypeError):
+            inp.einsum("ii->i", a, copy=False)
+
+        # unknown value for optimize keyword
+        with pytest.raises(TypeError):
+            inp.einsum("ii->i", a, optimize="average")
+
+        a = inp.ones((5, 4))
+        # different size for same label 5 != 4
+        with pytest.raises(ValueError):
+            inp.einsum("ii", a)
+
+        a = inp.ones((5, 5))
+        # repeated scripts in output
+        with pytest.raises(ValueError):
+            inp.einsum("ij,jk->ii", a, a)
+
+
 class TestInv:
     @pytest.mark.parametrize(
         "array",
diff --git a/tests/test_mathematical.py b/tests/test_mathematical.py
index 60ea8c0cd031..017f364b6681 100644
--- a/tests/test_mathematical.py
+++ b/tests/test_mathematical.py
@@ -2554,14 +2554,14 @@ def setup_method(self):
     def test_matmul(self, order_pair, shape_pair):
         order1, order2 = order_pair
         shape1, shape2 = shape_pair
-        a1 = numpy.arange(numpy.prod(shape1)).reshape(shape1)
-        a2 = numpy.arange(numpy.prod(shape2)).reshape(shape2)
+        dtype = dpnp.default_float_type()
+        a1 = numpy.arange(numpy.prod(shape1), dtype=dtype).reshape(shape1)
+        a2 = numpy.arange(numpy.prod(shape2), dtype=dtype).reshape(shape2)
         a1 = numpy.array(a1, order=order1)
         a2 = numpy.array(a2, order=order2)
 
         b1 = dpnp.asarray(a1)
         b2 = dpnp.asarray(a2)
-
         result = dpnp.matmul(b1, b2)
         expected = numpy.matmul(a1, a2)
         assert_dtype_allclose(result, expected)
diff --git a/tests/test_sycl_queue.py b/tests/test_sycl_queue.py
index dfa2296dbac1..d79ae8b3337d 100644
--- a/tests/test_sycl_queue.py
+++ b/tests/test_sycl_queue.py
@@ -1043,6 +1043,29 @@ def test_multi_dot(device):
         assert_sycl_queue_equal(result.sycl_queue, exec_q)
 
 
+@pytest.mark.parametrize(
+    "device",
+    valid_devices,
+    ids=[device.filter_string for device in valid_devices],
+)
+def test_einsum(device):
+    numpy_array_list = []
+    dpnp_array_list = []
+    for _ in range(3):  # creat arrays one by one
+        a = numpy.random.rand(10, 10)
+        b = dpnp.array(a, device=device)
+
+        numpy_array_list.append(a)
+        dpnp_array_list.append(b)
+
+    result = dpnp.einsum("ij,jk,kl->il", *dpnp_array_list)
+    expected = numpy.einsum("ij,jk,kl->il", *numpy_array_list)
+    assert_dtype_allclose(result, expected)
+
+    _, exec_q = get_usm_allocations(dpnp_array_list)
+    assert_sycl_queue_equal(result.sycl_queue, exec_q)
+
+
 @pytest.mark.parametrize(
     "device",
     valid_devices,
diff --git a/tests/test_usm_type.py b/tests/test_usm_type.py
index 7923e60455b1..75e70a978bc1 100644
--- a/tests/test_usm_type.py
+++ b/tests/test_usm_type.py
@@ -717,6 +717,26 @@ def test_multi_dot(usm_type):
         assert result.usm_type == usm_type
 
 
+@pytest.mark.parametrize("usm_type", list_of_usm_types, ids=list_of_usm_types)
+def test_einsum(usm_type):
+    numpy_array_list = []
+    dpnp_array_list = []
+    for _ in range(3):  # creat arrays one by one
+        a = numpy.random.rand(10, 10)
+        b = dp.array(a, usm_type=usm_type)
+
+        numpy_array_list.append(a)
+        dpnp_array_list.append(b)
+
+    result = dp.einsum("ij,jk,kl->il", *dpnp_array_list)
+    expected = numpy.einsum("ij,jk,kl->il", *numpy_array_list)
+    assert_dtype_allclose(result, expected)
+
+    input_usm_type, _ = get_usm_allocations(dpnp_array_list)
+    assert input_usm_type == usm_type
+    assert result.usm_type == usm_type
+
+
 @pytest.mark.parametrize("func", ["take", "take_along_axis"])
 @pytest.mark.parametrize("usm_type_x", list_of_usm_types, ids=list_of_usm_types)
 @pytest.mark.parametrize(
diff --git a/tests/third_party/cupy/linalg_tests/test_einsum.py b/tests/third_party/cupy/linalg_tests/test_einsum.py
index 613102c49c3f..b382ec28f174 100644
--- a/tests/third_party/cupy/linalg_tests/test_einsum.py
+++ b/tests/third_party/cupy/linalg_tests/test_einsum.py
@@ -1,4 +1,3 @@
-import unittest
 import warnings
 
 import numpy
@@ -8,14 +7,60 @@
 from tests.helper import has_support_aspect64
 from tests.third_party.cupy import testing
 
+rng = numpy.random.default_rng(seed=0)
+
 
 def _dec_shape(shape, dec):
     # Test smaller shape
     return tuple(1 if s == 1 else max(0, s - dec) for s in shape)
 
 
-@pytest.mark.usefixtures("allow_fall_back_on_numpy")
-class TestEinSumError(unittest.TestCase):
+def _rand1_shape(shape, prob):
+    # Test broadcast
+    # If diagonals are "broadcasted" we can simply:
+    # return tuple(1 if rng.uniform() < prob else s for s in shape)
+    table = {}
+    new_shape = []
+    for s in shape:
+        if s not in table:
+            table[s] = 1 if rng.uniform() < prob else s
+        new_shape.append(table[s])
+    return tuple(new_shape)
+
+
+def augment_einsum_testcases(*params):
+    """Modify shapes in einsum tests
+
+    Shape parameter should be starts with "shape_".
+    The original parameter is stored as "_raw_params".
+
+    Args:
+        params (sequence of dicts)
+
+    Yields:
+        dict: parameter with modified shapes.
+
+    """
+    for dec in range(3):
+        for drop in [False, True]:
+            for param in params:
+                param_new = param.copy()
+                for k in param.keys():
+                    if k.startswith("shape_"):
+                        new_shape = _dec_shape(param[k], dec)
+                        if drop:
+                            prob = rng.uniform()
+                            new_shape = _rand1_shape(new_shape, prob)
+                        param_new[k] = new_shape
+                param_new["_raw_params"] = {
+                    "orig": param,
+                    "dec": dec,
+                    "drop": drop,
+                }
+                yield param_new
+
+
+class TestEinSumError:
     def test_irregular_ellipsis1(self):
         for xp in (numpy, cupy):
             with pytest.raises(ValueError):
@@ -76,7 +121,8 @@ def test_too_few_operands1(self):
     def test_too_many_dimension1(self):
         for xp in (numpy, cupy):
             with pytest.raises(ValueError):
-                xp.einsum("i", 0)
+                # modified since dpnp does not support scalar
+                xp.einsum("i", xp.array(0))
 
     def test_too_many_dimension2(self):
         for xp in (numpy, cupy):
@@ -108,7 +154,7 @@ def test_invalid_char3(self):
             with pytest.raises(ValueError):
                 xp.einsum("i->&", xp.array([0, 0]))
 
-    # output subscripts must appear in inumpy.t
+    # output subscripts must appear in input
     def test_invalid_output_subscripts1(self):
         for xp in (numpy, cupy):
             with pytest.raises(ValueError):
@@ -120,13 +166,13 @@ def test_invalid_output_subscripts2(self):
             with pytest.raises(ValueError):
                 xp.einsum("ij->jij", xp.array([[0, 0], [0, 0]]))
 
-    # output subscripts must not incrudes comma
+    # output subscripts must not includes comma
     def test_invalid_output_subscripts3(self):
         for xp in (numpy, cupy):
             with pytest.raises(ValueError):
                 xp.einsum("ij->i,j", xp.array([[0, 0], [0, 0]]))
 
-    # dimensions much match when being collapsed
+    # dimensions must match when being collapsed
     def test_invalid_diagonal1(self):
         for xp in (numpy, cupy):
             with pytest.raises(ValueError):
@@ -186,31 +232,28 @@ def test_invalid_arrow4(self):
                 xp.einsum("i-", xp.array([0, 0]))
 
 
-class TestListArgEinSumError(unittest.TestCase):
+class TestListArgEinSumError:
+    @testing.with_requires("numpy>=1.19")
     def test_invalid_sub1(self):
         for xp in (numpy, cupy):
-            with pytest.raises(ValueError):
+            with pytest.raises(TypeError):
                 xp.einsum(xp.arange(2), [None])
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_invalid_sub2(self):
         for xp in (numpy, cupy):
             with pytest.raises(ValueError):
                 xp.einsum(xp.arange(2), [0], [1])
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_invalid_sub3(self):
         for xp in (numpy, cupy):
             with pytest.raises(ValueError):
                 xp.einsum(xp.arange(2), [Ellipsis, 0, Ellipsis])
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_dim_mismatch1(self):
         for xp in (numpy, cupy):
             with pytest.raises(ValueError):
                 xp.einsum(xp.arange(2), [0], xp.arange(3), [0])
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_dim_mismatch2(self):
         for xp in (numpy, cupy):
             with pytest.raises(ValueError):
@@ -221,13 +264,12 @@ def test_dim_mismatch3(self):
             with pytest.raises(ValueError):
                 xp.einsum(xp.arange(6).reshape(2, 3), [0, 0])
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_too_many_dims1(self):
         for xp in (numpy, cupy):
             with pytest.raises(ValueError):
-                xp.einsum(3, [0])
+                # modified since dpnp does not support scalar
+                xp.einsum(xp.array(3), [0])
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_too_many_dims2(self):
         for xp in (numpy, cupy):
             with pytest.raises(ValueError):
@@ -239,19 +281,200 @@ def test_too_many_dims3(self):
                 xp.einsum(xp.arange(6).reshape(2, 3), [Ellipsis, 0, 1, 2])
 
 
-class TestEinSumUnaryOperationWithScalar(unittest.TestCase):
+@pytest.mark.parametrize("do_opt", [False, True])
+class TestListArgEinSum:
+    # numpy/numpy#15961: should accept int64 type in subscript list
+    @testing.with_requires("numpy>=1.19")
+    @testing.numpy_cupy_allclose()
+    def test_numpy_15961_array(self, xp, do_opt):
+        n = 3
+        a = testing.shaped_arange((n, n), xp)
+        sub = numpy.array([0, 0])
+        return xp.einsum(a, sub, optimize=do_opt)
+
+    @testing.with_requires("numpy>=1.19")
+    @testing.numpy_cupy_allclose()
+    def test_numpy_15961_list(self, xp, do_opt):
+        n = 3
+        a = testing.shaped_arange((n, n), xp)
+        sub = list(numpy.array([0, 0]))
+        return xp.einsum(a, sub, optimize=do_opt)
+
+
+@testing.parameterize(
+    *augment_einsum_testcases(
+        {"shape_a": (2, 3), "subscripts": "ij"},  # do nothing
+        {"shape_a": (2, 3), "subscripts": "..."},  # do nothing
+        {"shape_a": (2, 3), "subscripts": "ji"},  # transpose
+        {"shape_a": (3, 3), "subscripts": "ii->i"},  # diagonal 2d
+        {"shape_a": (3, 3, 3), "subscripts": "jii->ij"},  # partial diagonal 3d
+        {"shape_a": (3, 3, 3), "subscripts": "iji->ij"},  # partial diagonal 3d
+        {
+            "shape_a": (3, 3, 3),
+            "subscripts": "...ii->...i",
+        },  # partial diagonal 3d
+        {"shape_a": (3, 3, 3), "subscripts": "iii->i"},  # diagonal 3d
+        {"shape_a": (2, 3, 4), "subscripts": "ijk->jik"},  # swap axes
+        {"shape_a": (2, 3, 4), "subscripts": "ijk->kij"},  # swap axes
+        {"shape_a": (2, 3, 4), "subscripts": "ijk->ikj"},  # swap axes
+        {"shape_a": (2, 3, 4), "subscripts": "kji->ikj"},  # swap axes
+        {"shape_a": (2, 3, 4), "subscripts": "j...i->i...j"},  # swap axes
+        {"shape_a": (3,), "subscripts": "i->"},  # sum
+        {"shape_a": (3, 3), "subscripts": "ii"},  # trace
+        {"shape_a": (2, 2, 2, 2), "subscripts": "ijkj->kij"},
+        {"shape_a": (2, 2, 2, 2), "subscripts": "ijij->ij"},
+        {"shape_a": (2, 2, 2, 2), "subscripts": "jiji->ij"},
+        {"shape_a": (2, 2, 2, 2), "subscripts": "ii...->..."},  # trace
+        {"shape_a": (2, 2, 2, 2), "subscripts": "i...i->..."},  # trace
+        {"shape_a": (2, 2, 2, 2), "subscripts": "...ii->..."},  # trace
+        {"shape_a": (2, 2, 2, 2), "subscripts": "j...i->..."},  # sum
+        {"shape_a": (2, 3), "subscripts": "ij->ij..."},  # do nothing
+        {"shape_a": (2, 3), "subscripts": "ij->i...j"},  # do nothing
+        {"shape_a": (2, 3), "subscripts": "ij->...ij"},  # do nothing
+        {"shape_a": (2, 3), "subscripts": "ij...->ij"},  # do nothing
+        {"shape_a": (2, 3), "subscripts": "i...j->ij"},  # do nothing
+        {"shape_a": (), "subscripts": ""},  # do nothing
+        {"shape_a": (), "subscripts": "->"},  # do nothing
+    )
+)
+class TestEinSumUnaryOperation:
+    @testing.for_all_dtypes(no_bool=False)
+    @testing.numpy_cupy_allclose(
+        rtol={numpy.float16: 1e-1, "default": 1e-7}, contiguous_check=False
+    )
+    def test_einsum_unary(self, xp, dtype):
+        a = testing.shaped_arange(self.shape_a, xp, dtype)
+        out = xp.einsum(self.subscripts, a)
+        if xp is not numpy:
+            optimized_out = xp.einsum(self.subscripts, a, optimize=True)
+            testing.assert_allclose(optimized_out, out)
+        return out
+
+    @pytest.mark.skip("view is not supported")
+    @testing.for_all_dtypes(no_bool=False)
+    @testing.numpy_cupy_equal()
+    def test_einsum_unary_views(self, xp, dtype):
+        a = testing.shaped_arange(self.shape_a, xp, dtype)
+        b = xp.einsum(self.subscripts, a)
+
+        return b.ndim == 0 or b.base is a
+
+    @testing.for_all_dtypes_combination(
+        ["dtype_a", "dtype_out"], no_bool=False, no_complex=True
+    )  # avoid ComplexWarning
+    @testing.numpy_cupy_allclose(
+        rtol={numpy.float16: 1e-1, "default": 1e-7}, contiguous_check=False
+    )
+    def test_einsum_unary_dtype(self, xp, dtype_a, dtype_out):
+        if not numpy.can_cast(dtype_a, dtype_out):
+            pytest.skip()
+        a = testing.shaped_arange(self.shape_a, xp, dtype_a)
+        return xp.einsum(self.subscripts, a, dtype=dtype_out)
+
+
+class TestEinSumUnaryOperationWithScalar:
+    @pytest.mark.skip("All operands are scalar.")
     @testing.for_all_dtypes()
     @testing.numpy_cupy_allclose()
     def test_scalar_int(self, xp, dtype):
         return xp.asarray(xp.einsum("->", 2, dtype=dtype))
 
+    @pytest.mark.skip("All operands are scalar.")
     @testing.for_all_dtypes()
     @testing.numpy_cupy_allclose()
     def test_scalar_float(self, xp, dtype):
         return xp.asarray(xp.einsum("", 2.0, dtype=dtype))
 
 
-class TestEinSumBinaryOperationWithScalar(unittest.TestCase):
+@testing.parameterize(
+    *augment_einsum_testcases(
+        # dot vecvec
+        {"shape_a": (3,), "shape_b": (3,), "subscripts": "i,i"},
+        # outer
+        {"shape_a": (2,), "shape_b": (3,), "subscripts": "i,j"},
+        # dot matvec
+        {"shape_a": (2, 3), "shape_b": (3,), "subscripts": "ij,j"},
+        {"shape_a": (2, 3), "shape_b": (2,), "subscripts": "ij,i"},
+        # dot matmat
+        {"shape_a": (2, 3), "shape_b": (3, 4), "subscripts": "ij,jk"},
+        # tensordot
+        {
+            "shape_a": (3, 4, 2),
+            "shape_b": (4, 3, 2),
+            "subscripts": "ijk, jil -> kl",
+        },
+        {
+            "shape_a": (3, 4, 2),
+            "shape_b": (4, 2, 3),
+            "subscripts": "i...,...k->ki...",
+        },
+        {
+            "shape_a": (3, 4, 2),
+            "shape_b": (4, 3, 2),
+            "subscripts": "ij...,ji...->i...",
+        },
+        # trace and tensordot and diagonal
+        {
+            "shape_a": (2, 3, 2, 4),
+            "shape_b": (3, 2, 2),
+            "subscripts": "ijil,jkk->kj",
+        },
+        {
+            "shape_a": (2, 4, 2, 3),
+            "shape_b": (3, 2, 4),
+            "subscripts": "i...ij,ji...->...j",
+        },
+        # broadcast
+        {
+            "shape_a": (2, 3, 4),
+            "shape_b": (3,),
+            "subscripts": "ij...,j...->ij...",
+        },
+        {
+            "shape_a": (2, 3, 4),
+            "shape_b": (3,),
+            "subscripts": "ij...,...j->ij...",
+        },
+        {"shape_a": (2, 3, 4), "shape_b": (3,), "subscripts": "ij...,j->ij..."},
+        {
+            "shape_a": (4, 3),
+            "shape_b": (3, 2),
+            "subscripts": "ik...,k...->i...",
+        },
+        {
+            "shape_a": (4, 3),
+            "shape_b": (3, 2),
+            "subscripts": "ik...,...kj->i...j",
+        },
+        {"shape_a": (4, 3), "shape_b": (3, 2), "subscripts": "...k,kj"},
+        {"shape_a": (4, 3), "shape_b": (3, 2), "subscripts": "ik,k...->i..."},
+        {"shape_a": (2, 3, 4, 5), "shape_b": (4,), "subscripts": "ijkl,k"},
+        {"shape_a": (2, 3, 4, 5), "shape_b": (4,), "subscripts": "...kl,k"},
+        {"shape_a": (2, 3, 4, 5), "shape_b": (4,), "subscripts": "...kl,k..."},
+        {
+            "shape_a": (1, 1, 1, 2, 3, 2),
+            "shape_b": (2, 3, 2, 2),
+            "subscripts": "...lmn,lmno->...o",
+        },
+    )
+)
+class TestEinSumBinaryOperation:
+    @testing.for_all_dtypes_combination(
+        ["dtype_a", "dtype_b"], no_bool=False, no_float16=False
+    )
+    @testing.numpy_cupy_allclose(
+        type_check=has_support_aspect64(), contiguous_check=False
+    )
+    def test_einsum_binary(self, xp, dtype_a, dtype_b):
+        a = testing.shaped_arange(self.shape_a, xp, dtype_a)
+        b = testing.shaped_arange(self.shape_b, xp, dtype_b)
+        # casting should be added for dpnp to allow cast int64 to float32
+        # when default float type is float32
+        casting = "safe" if has_support_aspect64() else "unsafe"
+        return xp.einsum(self.subscripts, a, b, casting=casting)
+
+
+class TestEinSumBinaryOperationWithScalar:
     @testing.for_all_dtypes()
     @testing.numpy_cupy_allclose(contiguous_check=False)
     def test_scalar_1(self, xp, dtype):
@@ -267,6 +490,99 @@ def test_scalar_2(self, xp, dtype):
         return xp.asarray(xp.einsum("i,->", a, 4))
 
 
+@testing.parameterize(
+    *augment_einsum_testcases(
+        {
+            "shape_a": (2, 3),
+            "shape_b": (3, 4),
+            "shape_c": (4, 5),
+            "subscripts": "ij,jk,kl",
+        },
+        {
+            "shape_a": (2, 4),
+            "shape_b": (2, 3),
+            "shape_c": (2,),
+            "subscripts": "ij,ik,i->ijk",
+        },
+        {
+            "shape_a": (2, 4),
+            "shape_b": (3, 2),
+            "shape_c": (2,),
+            "subscripts": "ij,ki,i->jk",
+        },
+        {
+            "shape_a": (2, 3, 4),
+            "shape_b": (2,),
+            "shape_c": (3, 4, 2),
+            "subscripts": "i...,i,...i->...i",
+        },
+        {
+            "shape_a": (2, 3, 4),
+            "shape_b": (4, 3),
+            "shape_c": (3, 3, 4),
+            "subscripts": "a...,...b,c...->abc...",
+        },
+        {
+            "shape_a": (2, 3, 4),
+            "shape_b": (3, 4),
+            "shape_c": (3, 3, 4),
+            "subscripts": "a...,...,c...->ac...",
+        },
+        {
+            "shape_a": (3, 3, 4),
+            "shape_b": (4, 3),
+            "shape_c": (2, 3, 4),
+            "subscripts": "a...,...b,c...->abc...",
+        },
+        {
+            "shape_a": (3, 3, 4),
+            "shape_b": (3, 4),
+            "shape_c": (2, 3, 4),
+            "subscripts": "a...,...,c...->ac...",
+        },
+    )
+)
+class TestEinSumTernaryOperation:
+    @testing.for_all_dtypes_combination(
+        ["dtype_a", "dtype_b", "dtype_c"], no_bool=False, no_float16=False
+    )
+    @testing.numpy_cupy_allclose(
+        type_check=has_support_aspect64(), contiguous_check=False
+    )
+    def test_einsum_ternary(self, xp, dtype_a, dtype_b, dtype_c):
+        a = testing.shaped_arange(self.shape_a, xp, dtype_a)
+        b = testing.shaped_arange(self.shape_b, xp, dtype_b)
+        c = testing.shaped_arange(self.shape_c, xp, dtype_c)
+
+        try:
+            # casting should be added for dpnp to allow cast int64 to float32
+            # when default float type is float32
+            casting = "safe" if has_support_aspect64() else "unsafe"
+            out = xp.einsum(
+                self.subscripts, a, b, c, optimize=False, casting=casting
+            )
+        except TypeError:
+            assert xp is numpy
+            out = xp.einsum(self.subscripts, a, b, c)
+
+        if xp is not numpy:  # Avoid numpy issues #11059, #11060
+            for optimize in [
+                True,  # "greedy"
+                "optimal",
+                ["einsum_path", (0, 1), (0, 1)],
+                ["einsum_path", (0, 2), (0, 1)],
+                ["einsum_path", (1, 2), (0, 1)],
+            ]:
+                # casting should be added for dpnp to allow cast int64 to float32
+                # when default float type is float32
+                casting = "safe" if has_support_aspect64() else "unsafe"
+                optimized_out = xp.einsum(
+                    self.subscripts, a, b, c, optimize=optimize, casting=casting
+                )
+                testing.assert_allclose(optimized_out, out)
+        return out
+
+
 @testing.parameterize(
     *(
         [
@@ -292,38 +608,36 @@ def test_scalar_2(self, xp, dtype):
         )
     )
 )
-class TestEinSumLarge(unittest.TestCase):
-    def setUp(self):
-        chars = "abcdefghij"
-        sizes = numpy.array([2, 3, 4, 5, 4, 3, 2, 6, 5, 4, 3])
-        size_dict = {}
-        for size, char in zip(sizes, chars):
-            size_dict[char] = size
-
-        # Builds views based off initial operands
-        string = self.subscript
-        operands = [string]
-        terms = string.split("->")[0].split(",")
-        for term in terms:
-            dims = [size_dict[x] for x in term]
-            operands.append(numpy.ones(*dims))
-
-        self.operands = operands
-
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
+class TestEinSumLarge:
+    chars = "abcdefghij"
+    sizes = (2, 3, 4, 5, 4, 3, 2, 6, 5, 4, 3)
+    size_dict = {}
+    for size, char in zip(sizes, chars):
+        size_dict[char] = size
+
+    @pytest.fixture()
+    def shapes(self):
+        size_dict = self.size_dict
+        terms = self.subscript.split("->")[0].split(",")
+        return tuple([tuple([size_dict[x] for x in term]) for term in terms])
+
     @testing.numpy_cupy_allclose(
-        type_check=has_support_aspect64(), contiguous_check=False
+        rtol=2e-6, type_check=has_support_aspect64(), contiguous_check=False
     )
-    def test_einsum(self, xp):
+    def test_einsum(self, xp, shapes):
+        arrays = [
+            testing.shaped_random(shape, xp, cupy.default_float_type(), scale=1)
+            for shape in shapes
+        ]
         # TODO(kataoka): support memory efficient cupy.einsum
         with warnings.catch_warnings(record=True) as ws:
-            # I hope there's no problem with np.einsum for these cases...
-            out = xp.einsum(*self.operands, optimize=self.opt)
+            # I hope there"s no problem with np.einsum for these cases...
+            out = xp.einsum(self.subscript, *arrays, optimize=self.opt)
             if xp is not numpy and isinstance(
                 self.opt, tuple
             ):  # with memory limit
                 for w in ws:
-                    self.assertIn("memory", str(w.message))
+                    assert "memory" in str(w.message)
             else:
-                self.assertEqual(len(ws), 0)
+                assert len(ws) == 0
         return out