From dd2970d02f22eeb56eebe96f39f0dfdae17b6ccb Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Sat, 7 Oct 2023 00:52:19 -0700 Subject: [PATCH 01/19] Implements dedicated __ipow__ kernel --- dpctl/tensor/_elementwise_funcs.py | 10 +- .../kernels/elementwise_functions/pow.hpp | 251 ++++++++++++++++-- .../source/elementwise_functions.cpp | 44 +++ 3 files changed, 279 insertions(+), 26 deletions(-) diff --git a/dpctl/tensor/_elementwise_funcs.py b/dpctl/tensor/_elementwise_funcs.py index 24ae7fa8cf..eb040e895d 100644 --- a/dpctl/tensor/_elementwise_funcs.py +++ b/dpctl/tensor/_elementwise_funcs.py @@ -1266,7 +1266,7 @@ ti._multiply_result_type, ti._multiply, _multiply_docstring_, - ti._multiply_inplace, + binary_inplace_fn=ti._multiply_inplace, ) # U25: ==== NEGATIVE (x) @@ -1361,7 +1361,11 @@ the returned array is determined by the Type Promotion Rules. """ pow = BinaryElementwiseFunc( - "pow", ti._pow_result_type, ti._pow, _pow_docstring_ + "pow", + ti._pow_result_type, + ti._pow, + _pow_docstring_, + binary_inplace_fn=ti._pow_inplace, ) # U??: ==== PROJ (x) @@ -1654,7 +1658,7 @@ ti._subtract_result_type, ti._subtract, _subtract_docstring_, - ti._subtract_inplace, + binary_inplace_fn=ti._subtract_inplace, ) diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/pow.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/pow.hpp index ba9241b8db..b52939148b 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/pow.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/pow.hpp @@ -55,31 +55,30 @@ template struct PowFunctor using supports_sg_loadstore = std::negation< std::disjunction, tu_ns::is_complex>>; - using supports_vec = - std::negation, - tu_ns::is_complex, - std::is_integral, - std::is_integral>>; + using supports_vec = std::negation< + std::disjunction, tu_ns::is_complex>>; - resT operator()(argT1 in1, argT2 in2) const + resT operator()(const argT1 &in1, const argT2 &in2) const { if constexpr (std::is_integral_v || std::is_integral_v) { + auto tmp1 = in1; + auto tmp2 = in2; if constexpr (std::is_signed_v) { - if (in2 < 0) { + if (tmp2 < 0) { // invalid; return 0 return resT(0); } } resT res = 1; - if (in1 == 1 || in2 == 0) { + if (tmp1 == 1 || tmp2 == 0) { return res; } - while (in2 > 0) { - if (in2 & 1) { - res *= in1; + while (tmp2 > 0) { + if (tmp2 & 1) { + res *= tmp1; } - in2 >>= 1; - in1 *= in1; + tmp2 >>= 1; + tmp1 *= tmp1; } return res; } @@ -93,16 +92,48 @@ template struct PowFunctor operator()(const sycl::vec &in1, const sycl::vec &in2) const { - auto res = sycl::pow(in1, in2); - if constexpr (std::is_same_v) { + if constexpr (std::is_integral_v || std::is_integral_v) { + sycl::vec res; +#pragma unroll + for (int i = 0; i < vec_sz; ++i) { + auto tmp1 = in1[i]; + auto tmp2 = in2[i]; + if constexpr (std::is_signed_v) { + if (tmp2 < 0) { + // invalid; yield 0 + res[i] = 0; + continue; + } + } + resT res_tmp = 1; + if (tmp1 == 1 || tmp2 == 0) { + res[i] = res_tmp; + continue; + } + while (tmp2 > 0) { + if (tmp2 & 1) { + res_tmp *= tmp1; + } + tmp2 >>= 1; + tmp1 *= tmp1; + } + res[i] = res_tmp; + } return res; } else { - using dpctl::tensor::type_utils::vec_cast; + auto res = sycl::pow(in1, in2); + if constexpr (std::is_same_v) + { + return res; + } + else { + using dpctl::tensor::type_utils::vec_cast; - return vec_cast( - res); + return vec_cast(res); + } } } }; @@ -128,10 +159,6 @@ using PowStridedFunctor = IndexerT, PowFunctor>; -// TODO: when type promotion logic is better defined, -// consider implementing overloads of std::pow that take -// integers for the exponents. Seem to give better accuracy in -// some cases (complex data especially) template struct PowOutputType { using value_type = typename std::disjunction< // disjunction is C++17 @@ -286,6 +313,184 @@ template struct PowStridedFactory } }; +template struct PowInplaceFunctor +{ + + using supports_sg_loadstore = std::negation< + std::disjunction, tu_ns::is_complex>>; + using supports_vec = std::negation< + std::disjunction, tu_ns::is_complex>>; + + void operator()(resT &res, const argT &in) + { + if constexpr (std::is_integral_v || std::is_integral_v) { + auto tmp1 = res; + auto tmp2 = in; + if constexpr (std::is_signed_v) { + if (tmp2 < 0) { + // invalid; return 0 + res = 0; + return; + } + } + if (tmp1 == 1) { + return; + } + if (tmp2 == 0) { + res = 1; + return; + } + resT res_tmp = 1; + while (tmp2 > 0) { + if (tmp2 & 1) { + res_tmp *= tmp1; + } + tmp2 >>= 1; + tmp1 *= tmp1; + } + res = res_tmp; + return; + } + else { + res = std::pow(res, in); + }; + } + + template + void operator()(sycl::vec &res, + const sycl::vec &in) + { + if constexpr (std::is_integral_v || std::is_integral_v) { +#pragma unroll + for (int i = 0; i < vec_sz; ++i) { + auto tmp1 = res[i]; + auto tmp2 = in[i]; + if constexpr (std::is_signed_v) { + if (tmp2 < 0) { + // invalid; return 0 + res[i] = 0; + continue; + } + } + if (tmp1 == 1) { + continue; + } + if (tmp2 == 0) { + res[i] = 1; + continue; + } + resT res_tmp = 1; + while (tmp2 > 0) { + if (tmp2 & 1) { + res_tmp *= tmp1; + } + tmp2 >>= 1; + tmp1 *= tmp1; + } + res[i] = res_tmp; + } + } + else { + res = sycl::pow(res, in); + } + } +}; + +template +using PowInplaceContigFunctor = elementwise_common::BinaryInplaceContigFunctor< + argT, + resT, + PowInplaceFunctor, + vec_sz, + n_vecs>; + +template +using PowInplaceStridedFunctor = + elementwise_common::BinaryInplaceStridedFunctor< + argT, + resT, + IndexerT, + PowInplaceFunctor>; + +template +class pow_inplace_contig_kernel; + +template +sycl::event +pow_inplace_contig_impl(sycl::queue &exec_q, + size_t nelems, + const char *arg_p, + py::ssize_t arg_offset, + char *res_p, + py::ssize_t res_offset, + const std::vector &depends = {}) +{ + return elementwise_common::binary_inplace_contig_impl< + argTy, resTy, PowInplaceContigFunctor, pow_inplace_contig_kernel>( + exec_q, nelems, arg_p, arg_offset, res_p, res_offset, depends); +} + +template struct PowInplaceContigFactory +{ + fnT get() + { + if constexpr (std::is_same_v::value_type, + void>) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = pow_inplace_contig_impl; + return fn; + } + } +}; + +template +class pow_inplace_strided_kernel; + +template +sycl::event +pow_inplace_strided_impl(sycl::queue &exec_q, + size_t nelems, + int nd, + const py::ssize_t *shape_and_strides, + const char *arg_p, + py::ssize_t arg_offset, + char *res_p, + py::ssize_t res_offset, + const std::vector &depends, + const std::vector &additional_depends) +{ + return elementwise_common::binary_inplace_strided_impl< + argTy, resTy, PowInplaceStridedFunctor, pow_inplace_strided_kernel>( + exec_q, nelems, nd, shape_and_strides, arg_p, arg_offset, res_p, + res_offset, depends, additional_depends); +} + +template +struct PowInplaceStridedFactory +{ + fnT get() + { + if constexpr (std::is_same_v::value_type, + void>) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = pow_inplace_strided_impl; + return fn; + } + } +}; + } // namespace pow } // namespace kernels } // namespace tensor diff --git a/dpctl/tensor/libtensor/source/elementwise_functions.cpp b/dpctl/tensor/libtensor/source/elementwise_functions.cpp index 043cac0cd2..649e044496 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions.cpp @@ -2164,6 +2164,11 @@ static int pow_output_id_table[td_ns::num_types][td_ns::num_types]; static binary_strided_impl_fn_ptr_t pow_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; +static binary_inplace_contig_impl_fn_ptr_t + pow_inplace_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static binary_inplace_strided_impl_fn_ptr_t + pow_inplace_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + void populate_pow_dispatch_tables(void) { using namespace td_ns; @@ -2187,6 +2192,20 @@ void populate_pow_dispatch_tables(void) num_types> dtb3; dtb3.populate_dispatch_table(pow_contig_dispatch_table); + + // function pointers for inplace operation on general strided arrays + using fn_ns::PowInplaceStridedFactory; + DispatchTableBuilder + dtb4; + dtb4.populate_dispatch_table(pow_inplace_strided_dispatch_table); + + // function pointers for inplace operation on contiguous inputs and output + using fn_ns::PowInplaceContigFactory; + DispatchTableBuilder + dtb5; + dtb5.populate_dispatch_table(pow_inplace_contig_dispatch_table); }; } // namespace impl @@ -4610,6 +4629,31 @@ void init_elementwise_functions(py::module_ m) py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); m.def("_pow_result_type", pow_result_type_pyapi, ""); + + using impl::pow_inplace_contig_dispatch_table; + using impl::pow_inplace_strided_dispatch_table; + + auto pow_inplace_pyapi = + [&](const dpctl::tensor::usm_ndarray &src, + const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, + const std::vector &depends = {}) { + return py_binary_inplace_ufunc( + src, dst, exec_q, depends, pow_output_id_table, + // function pointers to handle inplace operation on + // contiguous arrays (pointers may be nullptr) + pow_inplace_contig_dispatch_table, + // function pointers to handle inplace operation on strided + // arrays (most general case) + pow_inplace_strided_dispatch_table, + // function pointers to handle inplace operation on + // c-contig matrix with c-contig row with broadcasting + // (may be nullptr) + td_ns::NullPtrTable< + binary_inplace_row_matrix_broadcast_impl_fn_ptr_t>{}); + }; + m.def("_pow_inplace", pow_inplace_pyapi, "", py::arg("lhs"), + py::arg("rhs"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); } // U??: ==== PROJ (x) From 1804f1550b997730818b27c0a8a6246fa9d28946 Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Sat, 7 Oct 2023 09:03:06 -0700 Subject: [PATCH 02/19] Implements in-place remainder --- dpctl/tensor/_elementwise_funcs.py | 6 +- .../elementwise_functions/remainder.hpp | 183 ++++++++++++++++++ .../source/elementwise_functions.cpp | 45 +++++ 3 files changed, 233 insertions(+), 1 deletion(-) diff --git a/dpctl/tensor/_elementwise_funcs.py b/dpctl/tensor/_elementwise_funcs.py index eb040e895d..36a21b3138 100644 --- a/dpctl/tensor/_elementwise_funcs.py +++ b/dpctl/tensor/_elementwise_funcs.py @@ -1447,7 +1447,11 @@ the returned array is determined by the Type Promotion Rules. """ remainder = BinaryElementwiseFunc( - "remainder", ti._remainder_result_type, ti._remainder, _remainder_docstring_ + "remainder", + ti._remainder_result_type, + ti._remainder, + _remainder_docstring_, + binary_inplace_fn=ti._remainder_inplace, ) # U28: ==== ROUND (x) diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/remainder.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/remainder.hpp index 6cd306a900..7ffb5147c2 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/remainder.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/remainder.hpp @@ -313,6 +313,189 @@ template struct RemainderStridedFactory } }; +template struct RemainderInplaceFunctor +{ + + using supports_sg_loadstore = std::negation< + std::disjunction, tu_ns::is_complex>>; + using supports_vec = std::negation< + std::disjunction, tu_ns::is_complex>>; + + void operator()(resT &res, const argT &in) + { + if constexpr (std::is_integral_v || std::is_integral_v) { + if (in == argT(0)) { + res = 0; + return; + } + if constexpr (std::is_signed_v || std::is_signed_v) { + auto tmp = res; + res %= in; + if (res != 0 && l_xor(tmp < 0, in < 0)) { + res += in; + } + } + else { + res %= in; + } + } + else { + res = sycl::fmod(res, in); + if (res) { + if (l_xor(in < 0, res < 0)) { + res += in; + } + } + else { + res = std::copysign(0, in); + } + } + } + + template + void operator()(sycl::vec &res, + const sycl::vec &in) + { + if constexpr (std::is_integral_v || std::is_integral_v) { +#pragma unroll + for (auto i = 0; i < vec_sz; ++i) { + if (in[i] == argT(0)) { + res[i] = 0; + } + else { + auto rem = res[i] % in[i]; + if constexpr (std::is_signed_v || + std::is_signed_v) { + if (rem != 0 && l_xor(res[i] < 0, in[i] < 0)) { + rem += in[i]; + } + } + res[i] = rem; + } + } + } + else { + res = sycl::fmod(res, in); +#pragma unroll + for (auto i = 0; i < vec_sz; ++i) { + if (res[i]) { + if (l_xor(in[i] < 0, res[i] < 0)) { + res[i] += in[i]; + } + } + else { + res[i] = std::copysign(0, in[i]); + } + } + } + } + +private: + bool l_xor(bool b1, bool b2) const + { + return (b1 != b2); + } +}; + +template +using RemainderInplaceContigFunctor = + elementwise_common::BinaryInplaceContigFunctor< + argT, + resT, + RemainderInplaceFunctor, + vec_sz, + n_vecs>; + +template +using RemainderInplaceStridedFunctor = + elementwise_common::BinaryInplaceStridedFunctor< + argT, + resT, + IndexerT, + RemainderInplaceFunctor>; + +template +class remainder_inplace_contig_kernel; + +template +sycl::event +remainder_inplace_contig_impl(sycl::queue &exec_q, + size_t nelems, + const char *arg_p, + py::ssize_t arg_offset, + char *res_p, + py::ssize_t res_offset, + const std::vector &depends = {}) +{ + return elementwise_common::binary_inplace_contig_impl< + argTy, resTy, RemainderInplaceContigFunctor, + remainder_inplace_contig_kernel>(exec_q, nelems, arg_p, arg_offset, + res_p, res_offset, depends); +} + +template +struct RemainderInplaceContigFactory +{ + fnT get() + { + if constexpr (std::is_same_v::value_type, + void>) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = remainder_inplace_contig_impl; + return fn; + } + } +}; + +template +class remainder_inplace_strided_kernel; + +template +sycl::event remainder_inplace_strided_impl( + sycl::queue &exec_q, + size_t nelems, + int nd, + const py::ssize_t *shape_and_strides, + const char *arg_p, + py::ssize_t arg_offset, + char *res_p, + py::ssize_t res_offset, + const std::vector &depends, + const std::vector &additional_depends) +{ + return elementwise_common::binary_inplace_strided_impl< + argTy, resTy, RemainderInplaceStridedFunctor, + remainder_inplace_strided_kernel>(exec_q, nelems, nd, shape_and_strides, + arg_p, arg_offset, res_p, res_offset, + depends, additional_depends); +} + +template +struct RemainderInplaceStridedFactory +{ + fnT get() + { + if constexpr (std::is_same_v::value_type, + void>) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = remainder_inplace_strided_impl; + return fn; + } + } +}; + } // namespace remainder } // namespace kernels } // namespace tensor diff --git a/dpctl/tensor/libtensor/source/elementwise_functions.cpp b/dpctl/tensor/libtensor/source/elementwise_functions.cpp index 649e044496..7100a5f54e 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions.cpp @@ -2291,6 +2291,12 @@ static int remainder_output_id_table[td_ns::num_types][td_ns::num_types]; static binary_strided_impl_fn_ptr_t remainder_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; +static binary_inplace_contig_impl_fn_ptr_t + remainder_inplace_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static binary_inplace_strided_impl_fn_ptr_t + remainder_inplace_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + + void populate_remainder_dispatch_tables(void) { using namespace td_ns; @@ -2314,6 +2320,20 @@ void populate_remainder_dispatch_tables(void) num_types> dtb3; dtb3.populate_dispatch_table(remainder_contig_dispatch_table); + + // function pointers for inplace operation on general strided arrays + using fn_ns::RemainderInplaceStridedFactory; + DispatchTableBuilder + dtb4; + dtb4.populate_dispatch_table(remainder_inplace_strided_dispatch_table); + + // function pointers for inplace operation on contiguous inputs and output + using fn_ns::RemainderInplaceContigFactory; + DispatchTableBuilder + dtb5; + dtb5.populate_dispatch_table(remainder_inplace_contig_dispatch_table); } } // namespace impl @@ -4741,6 +4761,31 @@ void init_elementwise_functions(py::module_ m) py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); m.def("_remainder_result_type", remainder_result_type_pyapi, ""); + + using impl::remainder_inplace_contig_dispatch_table; + using impl::remainder_inplace_strided_dispatch_table; + + auto remainder_inplace_pyapi = + [&](const dpctl::tensor::usm_ndarray &src, + const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, + const std::vector &depends = {}) { + return py_binary_inplace_ufunc( + src, dst, exec_q, depends, remainder_output_id_table, + // function pointers to handle inplace operation on + // contiguous arrays (pointers may be nullptr) + remainder_inplace_contig_dispatch_table, + // function pointers to handle inplace operation on strided + // arrays (most general case) + remainder_inplace_strided_dispatch_table, + // function pointers to handle inplace operation on + // c-contig matrix with c-contig row with broadcasting + // (may be nullptr) + td_ns::NullPtrTable< + binary_inplace_row_matrix_broadcast_impl_fn_ptr_t>{}); + }; + m.def("_remainder_inplace", remainder_inplace_pyapi, "", py::arg("lhs"), + py::arg("rhs"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); } // U28: ==== ROUND (x) From a34b322618a7905192c1dd53d1bbe87366942372 Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Thu, 19 Oct 2023 01:16:23 -0700 Subject: [PATCH 03/19] Implements in-place bitwise_and and bitwise_or --- dpctl/tensor/_elementwise_funcs.py | 2 + .../elementwise_functions/bitwise_and.hpp | 138 ++++++++++++++++++ .../elementwise_functions/bitwise_or.hpp | 138 ++++++++++++++++++ .../source/elementwise_functions.cpp | 96 +++++++++++- 4 files changed, 372 insertions(+), 2 deletions(-) diff --git a/dpctl/tensor/_elementwise_funcs.py b/dpctl/tensor/_elementwise_funcs.py index 36a21b3138..ac91bc724a 100644 --- a/dpctl/tensor/_elementwise_funcs.py +++ b/dpctl/tensor/_elementwise_funcs.py @@ -297,6 +297,7 @@ ti._bitwise_and_result_type, ti._bitwise_and, _bitwise_and_docstring_, + binary_inplace_fn=ti._bitwise_and_inplace, ) # B04: ===== BITWISE_LEFT_SHIFT (x1, x2) @@ -393,6 +394,7 @@ ti._bitwise_or_result_type, ti._bitwise_or, _bitwise_or_docstring_, + binary_inplace_fn=ti._bitwise_or_inplace, ) # B06: ===== BITWISE_RIGHT_SHIFT (x1, x2) diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_and.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_and.hpp index 016b3a05d3..c928200e3c 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_and.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_and.hpp @@ -257,6 +257,144 @@ struct BitwiseAndStridedFactory } }; +template struct BitwiseAndInplaceFunctor +{ + using supports_sg_loadstore = typename std::true_type; + using supports_vec = typename std::true_type; + + void operator()(resT &res, const argT &in) const + { + using tu_ns::convert_impl; + + if constexpr (std::is_same_v) { + res = res && in; + } + else { + res &= in; + } + } + + template + void operator()(sycl::vec &res, + const sycl::vec &in) const + { + + if constexpr (std::is_same_v) { + using dpctl::tensor::type_utils::vec_cast; + + auto tmp = (res && in); + res = vec_cast( + tmp); + } + else { + res &= in; + } + } +}; + +template +using BitwiseAndInplaceContigFunctor = + elementwise_common::BinaryInplaceContigFunctor< + argT, + resT, + BitwiseAndInplaceFunctor, + vec_sz, + n_vecs>; + +template +using BitwiseAndInplaceStridedFunctor = + elementwise_common::BinaryInplaceStridedFunctor< + argT, + resT, + IndexerT, + BitwiseAndInplaceFunctor>; + +template +class bitwise_and_inplace_contig_kernel; + +template +sycl::event +bitwise_and_inplace_contig_impl(sycl::queue &exec_q, + size_t nelems, + const char *arg_p, + py::ssize_t arg_offset, + char *res_p, + py::ssize_t res_offset, + const std::vector &depends = {}) +{ + return elementwise_common::binary_inplace_contig_impl< + argTy, resTy, BitwiseAndInplaceContigFunctor, + bitwise_and_inplace_contig_kernel>(exec_q, nelems, arg_p, arg_offset, + res_p, res_offset, depends); +} + +template +struct BitwiseAndInplaceContigFactory +{ + fnT get() + { + if constexpr (std::is_same_v< + typename BitwiseAndOutputType::value_type, + void>) + { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = bitwise_and_inplace_contig_impl; + return fn; + } + } +}; + +template +class bitwise_and_inplace_strided_kernel; + +template +sycl::event bitwise_and_inplace_strided_impl( + sycl::queue &exec_q, + size_t nelems, + int nd, + const py::ssize_t *shape_and_strides, + const char *arg_p, + py::ssize_t arg_offset, + char *res_p, + py::ssize_t res_offset, + const std::vector &depends, + const std::vector &additional_depends) +{ + return elementwise_common::binary_inplace_strided_impl< + argTy, resTy, BitwiseAndInplaceStridedFunctor, + bitwise_and_inplace_strided_kernel>( + exec_q, nelems, nd, shape_and_strides, arg_p, arg_offset, res_p, + res_offset, depends, additional_depends); +} + +template +struct BitwiseAndInplaceStridedFactory +{ + fnT get() + { + if constexpr (std::is_same_v< + typename BitwiseAndOutputType::value_type, + void>) + { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = bitwise_and_inplace_strided_impl; + return fn; + } + } +}; + } // namespace bitwise_and } // namespace kernels } // namespace tensor diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_or.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_or.hpp index 65f25dd296..3984f9b94e 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_or.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_or.hpp @@ -253,6 +253,144 @@ template struct BitwiseOrStridedFactory } }; +template struct BitwiseOrInplaceFunctor +{ + using supports_sg_loadstore = typename std::true_type; + using supports_vec = typename std::true_type; + + void operator()(resT &res, const argT &in) const + { + using tu_ns::convert_impl; + + if constexpr (std::is_same_v) { + res = res || in; + } + else { + res |= in; + } + } + + template + void operator()(sycl::vec &res, + const sycl::vec &in) const + { + + if constexpr (std::is_same_v) { + using dpctl::tensor::type_utils::vec_cast; + + auto tmp = (res || in); + res = vec_cast( + tmp); + } + else { + res |= in; + } + } +}; + +template +using BitwiseOrInplaceContigFunctor = + elementwise_common::BinaryInplaceContigFunctor< + argT, + resT, + BitwiseOrInplaceFunctor, + vec_sz, + n_vecs>; + +template +using BitwiseOrInplaceStridedFunctor = + elementwise_common::BinaryInplaceStridedFunctor< + argT, + resT, + IndexerT, + BitwiseOrInplaceFunctor>; + +template +class bitwise_or_inplace_contig_kernel; + +template +sycl::event +bitwise_or_inplace_contig_impl(sycl::queue &exec_q, + size_t nelems, + const char *arg_p, + py::ssize_t arg_offset, + char *res_p, + py::ssize_t res_offset, + const std::vector &depends = {}) +{ + return elementwise_common::binary_inplace_contig_impl< + argTy, resTy, BitwiseOrInplaceContigFunctor, + bitwise_or_inplace_contig_kernel>(exec_q, nelems, arg_p, arg_offset, + res_p, res_offset, depends); +} + +template +struct BitwiseOrInplaceContigFactory +{ + fnT get() + { + if constexpr (std::is_same_v< + typename BitwiseOrOutputType::value_type, + void>) + { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = bitwise_or_inplace_contig_impl; + return fn; + } + } +}; + +template +class bitwise_or_inplace_strided_kernel; + +template +sycl::event bitwise_or_inplace_strided_impl( + sycl::queue &exec_q, + size_t nelems, + int nd, + const py::ssize_t *shape_and_strides, + const char *arg_p, + py::ssize_t arg_offset, + char *res_p, + py::ssize_t res_offset, + const std::vector &depends, + const std::vector &additional_depends) +{ + return elementwise_common::binary_inplace_strided_impl< + argTy, resTy, BitwiseOrInplaceStridedFunctor, + bitwise_or_inplace_strided_kernel>( + exec_q, nelems, nd, shape_and_strides, arg_p, arg_offset, res_p, + res_offset, depends, additional_depends); +} + +template +struct BitwiseOrInplaceStridedFactory +{ + fnT get() + { + if constexpr (std::is_same_v< + typename BitwiseOrOutputType::value_type, + void>) + { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = bitwise_or_inplace_strided_impl; + return fn; + } + } +}; + } // namespace bitwise_or } // namespace kernels } // namespace tensor diff --git a/dpctl/tensor/libtensor/source/elementwise_functions.cpp b/dpctl/tensor/libtensor/source/elementwise_functions.cpp index 7100a5f54e..933feee29a 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions.cpp @@ -566,6 +566,13 @@ static int bitwise_and_output_id_table[td_ns::num_types][td_ns::num_types]; static binary_strided_impl_fn_ptr_t bitwise_and_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; +static binary_inplace_contig_impl_fn_ptr_t + bitwise_and_inplace_contig_dispatch_table[td_ns::num_types] + [td_ns::num_types]; +static binary_inplace_strided_impl_fn_ptr_t + bitwise_and_inplace_strided_dispatch_table[td_ns::num_types] + [td_ns::num_types]; + void populate_bitwise_and_dispatch_tables(void) { using namespace td_ns; @@ -589,6 +596,20 @@ void populate_bitwise_and_dispatch_tables(void) num_types> dtb3; dtb3.populate_dispatch_table(bitwise_and_contig_dispatch_table); + + // function pointers for inplace operation on general strided arrays + using fn_ns::BitwiseAndInplaceStridedFactory; + DispatchTableBuilder + dtb4; + dtb4.populate_dispatch_table(bitwise_and_inplace_strided_dispatch_table); + + // function pointers for inplace operation on contiguous inputs and output + using fn_ns::BitwiseAndInplaceContigFactory; + DispatchTableBuilder + dtb5; + dtb5.populate_dispatch_table(bitwise_and_inplace_contig_dispatch_table); }; } // namespace impl @@ -683,6 +704,13 @@ static int bitwise_or_output_id_table[td_ns::num_types][td_ns::num_types]; static binary_strided_impl_fn_ptr_t bitwise_or_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; +static binary_inplace_contig_impl_fn_ptr_t + bitwise_or_inplace_contig_dispatch_table[td_ns::num_types] + [td_ns::num_types]; +static binary_inplace_strided_impl_fn_ptr_t + bitwise_or_inplace_strided_dispatch_table[td_ns::num_types] + [td_ns::num_types]; + void populate_bitwise_or_dispatch_tables(void) { using namespace td_ns; @@ -706,6 +734,20 @@ void populate_bitwise_or_dispatch_tables(void) num_types> dtb3; dtb3.populate_dispatch_table(bitwise_or_contig_dispatch_table); + + // function pointers for inplace operation on general strided arrays + using fn_ns::BitwiseOrInplaceStridedFactory; + DispatchTableBuilder + dtb4; + dtb4.populate_dispatch_table(bitwise_or_inplace_strided_dispatch_table); + + // function pointers for inplace operation on contiguous inputs and output + using fn_ns::BitwiseOrInplaceContigFactory; + DispatchTableBuilder + dtb5; + dtb5.populate_dispatch_table(bitwise_or_inplace_contig_dispatch_table); }; } // namespace impl @@ -2294,8 +2336,8 @@ static binary_strided_impl_fn_ptr_t static binary_inplace_contig_impl_fn_ptr_t remainder_inplace_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; static binary_inplace_strided_impl_fn_ptr_t - remainder_inplace_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; - + remainder_inplace_strided_dispatch_table[td_ns::num_types] + [td_ns::num_types]; void populate_remainder_dispatch_tables(void) { @@ -3292,6 +3334,31 @@ void init_elementwise_functions(py::module_ m) py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); m.def("_bitwise_and_result_type", bitwise_and_result_type_pyapi, ""); + + using impl::bitwise_and_inplace_contig_dispatch_table; + using impl::bitwise_and_inplace_strided_dispatch_table; + + auto bitwise_and_inplace_pyapi = + [&](const dpctl::tensor::usm_ndarray &src, + const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, + const std::vector &depends = {}) { + return py_binary_inplace_ufunc( + src, dst, exec_q, depends, bitwise_and_output_id_table, + // function pointers to handle inplace operation on + // contiguous arrays (pointers may be nullptr) + bitwise_and_inplace_contig_dispatch_table, + // function pointers to handle inplace operation on strided + // arrays (most general case) + bitwise_and_inplace_strided_dispatch_table, + // function pointers to handle inplace operation on + // c-contig matrix with c-contig row with broadcasting + // (may be nullptr) + td_ns::NullPtrTable< + binary_inplace_row_matrix_broadcast_impl_fn_ptr_t>{}); + }; + m.def("_bitwise_and_inplace", bitwise_and_inplace_pyapi, "", + py::arg("lhs"), py::arg("rhs"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); } // B04: ===== BITWISE_LEFT_SHIFT (x1, x2) @@ -3405,6 +3472,31 @@ void init_elementwise_functions(py::module_ m) py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); m.def("_bitwise_or_result_type", bitwise_or_result_type_pyapi, ""); + + using impl::bitwise_or_inplace_contig_dispatch_table; + using impl::bitwise_or_inplace_strided_dispatch_table; + + auto bitwise_or_inplace_pyapi = + [&](const dpctl::tensor::usm_ndarray &src, + const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, + const std::vector &depends = {}) { + return py_binary_inplace_ufunc( + src, dst, exec_q, depends, bitwise_or_output_id_table, + // function pointers to handle inplace operation on + // contiguous arrays (pointers may be nullptr) + bitwise_or_inplace_contig_dispatch_table, + // function pointers to handle inplace operation on strided + // arrays (most general case) + bitwise_or_inplace_strided_dispatch_table, + // function pointers to handle inplace operation on + // c-contig matrix with c-contig row with broadcasting + // (may be nullptr) + td_ns::NullPtrTable< + binary_inplace_row_matrix_broadcast_impl_fn_ptr_t>{}); + }; + m.def("_bitwise_or_inplace", bitwise_or_inplace_pyapi, "", + py::arg("lhs"), py::arg("rhs"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); } // B06: ===== BITWISE_RIGHT_SHIFT (x1, x2) From 13a4e90301c5bbbddf4d9f6c1c5b873fa236d180 Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Thu, 19 Oct 2023 01:16:59 -0700 Subject: [PATCH 04/19] Implements in-place bitwise_xor --- dpctl/tensor/_elementwise_funcs.py | 1 + .../elementwise_functions/bitwise_xor.hpp | 138 ++++++++++++++++++ .../source/elementwise_functions.cpp | 46 ++++++ 3 files changed, 185 insertions(+) diff --git a/dpctl/tensor/_elementwise_funcs.py b/dpctl/tensor/_elementwise_funcs.py index ac91bc724a..46b6bc945d 100644 --- a/dpctl/tensor/_elementwise_funcs.py +++ b/dpctl/tensor/_elementwise_funcs.py @@ -461,6 +461,7 @@ ti._bitwise_xor_result_type, ti._bitwise_xor, _bitwise_xor_docstring_, + binary_inplace_fn=ti._bitwise_xor_inplace, ) diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_xor.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_xor.hpp index 2b0ab09dca..decb2ddfd0 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_xor.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_xor.hpp @@ -257,6 +257,144 @@ struct BitwiseXorStridedFactory } }; +template struct BitwiseXorInplaceFunctor +{ + using supports_sg_loadstore = typename std::true_type; + using supports_vec = typename std::true_type; + + void operator()(resT &res, const argT &in) const + { + using tu_ns::convert_impl; + + if constexpr (std::is_same_v) { + res = (res != in); + } + else { + res ^= in; + } + } + + template + void operator()(sycl::vec &res, + const sycl::vec &in) const + { + + if constexpr (std::is_same_v) { + using dpctl::tensor::type_utils::vec_cast; + + auto tmp = (res != in); + res = vec_cast( + tmp); + } + else { + res ^= in; + } + } +}; + +template +using BitwiseXorInplaceContigFunctor = + elementwise_common::BinaryInplaceContigFunctor< + argT, + resT, + BitwiseXorInplaceFunctor, + vec_sz, + n_vecs>; + +template +using BitwiseXorInplaceStridedFunctor = + elementwise_common::BinaryInplaceStridedFunctor< + argT, + resT, + IndexerT, + BitwiseXorInplaceFunctor>; + +template +class bitwise_xor_inplace_contig_kernel; + +template +sycl::event +bitwise_xor_inplace_contig_impl(sycl::queue &exec_q, + size_t nelems, + const char *arg_p, + py::ssize_t arg_offset, + char *res_p, + py::ssize_t res_offset, + const std::vector &depends = {}) +{ + return elementwise_common::binary_inplace_contig_impl< + argTy, resTy, BitwiseXorInplaceContigFunctor, + bitwise_xor_inplace_contig_kernel>(exec_q, nelems, arg_p, arg_offset, + res_p, res_offset, depends); +} + +template +struct BitwiseXorInplaceContigFactory +{ + fnT get() + { + if constexpr (std::is_same_v< + typename BitwiseXorOutputType::value_type, + void>) + { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = bitwise_xor_inplace_contig_impl; + return fn; + } + } +}; + +template +class bitwise_xor_inplace_strided_kernel; + +template +sycl::event bitwise_xor_inplace_strided_impl( + sycl::queue &exec_q, + size_t nelems, + int nd, + const py::ssize_t *shape_and_strides, + const char *arg_p, + py::ssize_t arg_offset, + char *res_p, + py::ssize_t res_offset, + const std::vector &depends, + const std::vector &additional_depends) +{ + return elementwise_common::binary_inplace_strided_impl< + argTy, resTy, BitwiseXorInplaceStridedFunctor, + bitwise_xor_inplace_strided_kernel>( + exec_q, nelems, nd, shape_and_strides, arg_p, arg_offset, res_p, + res_offset, depends, additional_depends); +} + +template +struct BitwiseXorInplaceStridedFactory +{ + fnT get() + { + if constexpr (std::is_same_v< + typename BitwiseXorOutputType::value_type, + void>) + { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = bitwise_xor_inplace_strided_impl; + return fn; + } + } +}; + } // namespace bitwise_xor } // namespace kernels } // namespace tensor diff --git a/dpctl/tensor/libtensor/source/elementwise_functions.cpp b/dpctl/tensor/libtensor/source/elementwise_functions.cpp index 933feee29a..75957f2619 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions.cpp @@ -806,6 +806,13 @@ static int bitwise_xor_output_id_table[td_ns::num_types][td_ns::num_types]; static binary_strided_impl_fn_ptr_t bitwise_xor_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; +static binary_inplace_contig_impl_fn_ptr_t + bitwise_xor_inplace_contig_dispatch_table[td_ns::num_types] + [td_ns::num_types]; +static binary_inplace_strided_impl_fn_ptr_t + bitwise_xor_inplace_strided_dispatch_table[td_ns::num_types] + [td_ns::num_types]; + void populate_bitwise_xor_dispatch_tables(void) { using namespace td_ns; @@ -829,6 +836,20 @@ void populate_bitwise_xor_dispatch_tables(void) num_types> dtb3; dtb3.populate_dispatch_table(bitwise_xor_contig_dispatch_table); + + // function pointers for inplace operation on general strided arrays + using fn_ns::BitwiseXorInplaceStridedFactory; + DispatchTableBuilder + dtb4; + dtb4.populate_dispatch_table(bitwise_xor_inplace_strided_dispatch_table); + + // function pointers for inplace operation on contiguous inputs and output + using fn_ns::BitwiseXorInplaceContigFactory; + DispatchTableBuilder + dtb5; + dtb5.populate_dispatch_table(bitwise_xor_inplace_contig_dispatch_table); }; } // namespace impl @@ -3584,6 +3605,31 @@ void init_elementwise_functions(py::module_ m) py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); m.def("_bitwise_xor_result_type", bitwise_xor_result_type_pyapi, ""); + + using impl::bitwise_xor_inplace_contig_dispatch_table; + using impl::bitwise_xor_inplace_strided_dispatch_table; + + auto bitwise_xor_inplace_pyapi = + [&](const dpctl::tensor::usm_ndarray &src, + const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, + const std::vector &depends = {}) { + return py_binary_inplace_ufunc( + src, dst, exec_q, depends, bitwise_xor_output_id_table, + // function pointers to handle inplace operation on + // contiguous arrays (pointers may be nullptr) + bitwise_xor_inplace_contig_dispatch_table, + // function pointers to handle inplace operation on strided + // arrays (most general case) + bitwise_xor_inplace_strided_dispatch_table, + // function pointers to handle inplace operation on + // c-contig matrix with c-contig row with broadcasting + // (may be nullptr) + td_ns::NullPtrTable< + binary_inplace_row_matrix_broadcast_impl_fn_ptr_t>{}); + }; + m.def("_bitwise_xor_inplace", bitwise_xor_inplace_pyapi, "", + py::arg("lhs"), py::arg("rhs"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); } // U09: ==== CEIL (x) From 0af493f42bab86ae00d20e6ed12d1d61e501ec10 Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Thu, 19 Oct 2023 01:17:55 -0700 Subject: [PATCH 05/19] Implements in-place bitwise_left_shift and bitwise_right_shift --- dpctl/tensor/_elementwise_funcs.py | 2 + .../bitwise_left_shift.hpp | 144 +++++++++++++++++ .../bitwise_right_shift.hpp | 146 ++++++++++++++++++ .../source/elementwise_functions.cpp | 98 ++++++++++++ 4 files changed, 390 insertions(+) diff --git a/dpctl/tensor/_elementwise_funcs.py b/dpctl/tensor/_elementwise_funcs.py index 46b6bc945d..906f37ee10 100644 --- a/dpctl/tensor/_elementwise_funcs.py +++ b/dpctl/tensor/_elementwise_funcs.py @@ -331,6 +331,7 @@ ti._bitwise_left_shift_result_type, ti._bitwise_left_shift, _bitwise_left_shift_docstring_, + binary_inplace_fn=ti._bitwise_left_shift_inplace, ) @@ -427,6 +428,7 @@ ti._bitwise_right_shift_result_type, ti._bitwise_right_shift, _bitwise_right_shift_docstring_, + binary_inplace_fn=ti._bitwise_right_shift_inplace, ) diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_left_shift.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_left_shift.hpp index 4ae04f97de..8fce912e68 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_left_shift.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_left_shift.hpp @@ -268,6 +268,150 @@ struct BitwiseLeftShiftStridedFactory } }; +template struct BitwiseLeftShiftInplaceFunctor +{ + static_assert(std::is_integral_v); + static_assert(!std::is_same_v); + + using supports_sg_loadstore = typename std::true_type; + using supports_vec = typename std::true_type; + + void operator()(resT &res, const argT &in) const + { + impl(res, in); + } + + template + void operator()(sycl::vec &res, + const sycl::vec &in) const + { +#pragma unroll + for (int i = 0; i < vec_sz; ++i) { + impl(res[i], in[i]); + } + } + +private: + void impl(resT &res, const argT &in) const + { + constexpr argT res_bitsize = static_cast(sizeof(resT) * 8); + constexpr resT zero = resT(0); + + // bitshift op with second operand negative, or >= bitwidth(argT1) is UB + // array API spec mandates 0 + if constexpr (std::is_unsigned_v) { + (in < res_bitsize) ? (res <<= in) : res = zero; + } + else { + (in < argT(0)) ? res = zero + : ((in < res_bitsize) ? (res <<= in) : res = zero); + } + } +}; + +template +using BitwiseLeftShiftInplaceContigFunctor = + elementwise_common::BinaryInplaceContigFunctor< + argT, + resT, + BitwiseLeftShiftInplaceFunctor, + vec_sz, + n_vecs>; + +template +using BitwiseLeftShiftInplaceStridedFunctor = + elementwise_common::BinaryInplaceStridedFunctor< + argT, + resT, + IndexerT, + BitwiseLeftShiftInplaceFunctor>; + +template +class bitwise_left_shift_inplace_contig_kernel; + +template +sycl::event bitwise_left_shift_inplace_contig_impl( + sycl::queue &exec_q, + size_t nelems, + const char *arg_p, + py::ssize_t arg_offset, + char *res_p, + py::ssize_t res_offset, + const std::vector &depends = {}) +{ + return elementwise_common::binary_inplace_contig_impl< + argTy, resTy, BitwiseLeftShiftInplaceContigFunctor, + bitwise_left_shift_inplace_contig_kernel>( + exec_q, nelems, arg_p, arg_offset, res_p, res_offset, depends); +} + +template +struct BitwiseLeftShiftInplaceContigFactory +{ + fnT get() + { + if constexpr (std::is_same_v::value_type, + void>) + { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = bitwise_left_shift_inplace_contig_impl; + return fn; + } + } +}; + +template +class bitwise_left_shift_inplace_strided_kernel; + +template +sycl::event bitwise_left_shift_inplace_strided_impl( + sycl::queue &exec_q, + size_t nelems, + int nd, + const py::ssize_t *shape_and_strides, + const char *arg_p, + py::ssize_t arg_offset, + char *res_p, + py::ssize_t res_offset, + const std::vector &depends, + const std::vector &additional_depends) +{ + return elementwise_common::binary_inplace_strided_impl< + argTy, resTy, BitwiseLeftShiftInplaceStridedFunctor, + bitwise_left_shift_inplace_strided_kernel>( + exec_q, nelems, nd, shape_and_strides, arg_p, arg_offset, res_p, + res_offset, depends, additional_depends); +} + +template +struct BitwiseLeftShiftInplaceStridedFactory +{ + fnT get() + { + if constexpr (std::is_same_v::value_type, + void>) + { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = bitwise_left_shift_inplace_strided_impl; + return fn; + } + } +}; + } // namespace bitwise_left_shift } // namespace kernels } // namespace tensor diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_right_shift.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_right_shift.hpp index 9442d4f6b7..1607ced360 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_right_shift.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_right_shift.hpp @@ -270,6 +270,152 @@ struct BitwiseRightShiftStridedFactory } }; +template struct BitwiseRightShiftInplaceFunctor +{ + static_assert(std::is_integral_v); + static_assert(!std::is_same_v); + + using supports_sg_loadstore = typename std::true_type; + using supports_vec = typename std::true_type; + + void operator()(resT &res, const argT &in) const + { + impl(res, in); + } + + template + void operator()(sycl::vec &res, + const sycl::vec &in) const + { +#pragma unroll + for (int i = 0; i < vec_sz; ++i) { + impl(res[i], in[i]); + } + } + +private: + void impl(resT &res, const argT &in) const + { + constexpr argT res_bitsize = static_cast(sizeof(resT) * 8); + constexpr resT zero = resT(0); + + // bitshift op with second operand negative, or >= bitwidth(argT1) is UB + // array API spec mandates 0 + if constexpr (std::is_unsigned_v) { + (in < res_bitsize) ? (res >>= in) : res = zero; + } + else { + (in < argT(0)) ? res = zero + : ((in < res_bitsize) ? (res >>= in) + : (res < resT(0)) ? res = resT(-1) + : res = zero); + } + } +}; + +template +using BitwiseRightShiftInplaceContigFunctor = + elementwise_common::BinaryInplaceContigFunctor< + argT, + resT, + BitwiseRightShiftInplaceFunctor, + vec_sz, + n_vecs>; + +template +using BitwiseRightShiftInplaceStridedFunctor = + elementwise_common::BinaryInplaceStridedFunctor< + argT, + resT, + IndexerT, + BitwiseRightShiftInplaceFunctor>; + +template +class bitwise_right_shift_inplace_contig_kernel; + +template +sycl::event bitwise_right_shift_inplace_contig_impl( + sycl::queue &exec_q, + size_t nelems, + const char *arg_p, + py::ssize_t arg_offset, + char *res_p, + py::ssize_t res_offset, + const std::vector &depends = {}) +{ + return elementwise_common::binary_inplace_contig_impl< + argTy, resTy, BitwiseRightShiftInplaceContigFunctor, + bitwise_right_shift_inplace_contig_kernel>( + exec_q, nelems, arg_p, arg_offset, res_p, res_offset, depends); +} + +template +struct BitwiseRightShiftInplaceContigFactory +{ + fnT get() + { + if constexpr (std::is_same_v::value_type, + void>) + { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = bitwise_right_shift_inplace_contig_impl; + return fn; + } + } +}; + +template +class bitwise_right_shift_inplace_strided_kernel; + +template +sycl::event bitwise_right_shift_inplace_strided_impl( + sycl::queue &exec_q, + size_t nelems, + int nd, + const py::ssize_t *shape_and_strides, + const char *arg_p, + py::ssize_t arg_offset, + char *res_p, + py::ssize_t res_offset, + const std::vector &depends, + const std::vector &additional_depends) +{ + return elementwise_common::binary_inplace_strided_impl< + argTy, resTy, BitwiseRightShiftInplaceStridedFunctor, + bitwise_right_shift_inplace_strided_kernel>( + exec_q, nelems, nd, shape_and_strides, arg_p, arg_offset, res_p, + res_offset, depends, additional_depends); +} + +template +struct BitwiseRightShiftInplaceStridedFactory +{ + fnT get() + { + if constexpr (std::is_same_v::value_type, + void>) + { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = bitwise_right_shift_inplace_strided_impl; + return fn; + } + } +}; + } // namespace bitwise_right_shift } // namespace kernels } // namespace tensor diff --git a/dpctl/tensor/libtensor/source/elementwise_functions.cpp b/dpctl/tensor/libtensor/source/elementwise_functions.cpp index 75957f2619..c42eea133f 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions.cpp @@ -629,6 +629,13 @@ static binary_strided_impl_fn_ptr_t bitwise_left_shift_strided_dispatch_table[td_ns::num_types] [td_ns::num_types]; +static binary_inplace_contig_impl_fn_ptr_t + bitwise_left_shift_inplace_contig_dispatch_table[td_ns::num_types] + [td_ns::num_types]; +static binary_inplace_strided_impl_fn_ptr_t + bitwise_left_shift_inplace_strided_dispatch_table[td_ns::num_types] + [td_ns::num_types]; + void populate_bitwise_left_shift_dispatch_tables(void) { using namespace td_ns; @@ -652,6 +659,22 @@ void populate_bitwise_left_shift_dispatch_tables(void) BitwiseLeftShiftContigFactory, num_types> dtb3; dtb3.populate_dispatch_table(bitwise_left_shift_contig_dispatch_table); + + // function pointers for inplace operation on general strided arrays + using fn_ns::BitwiseLeftShiftInplaceStridedFactory; + DispatchTableBuilder + dtb4; + dtb4.populate_dispatch_table( + bitwise_left_shift_inplace_strided_dispatch_table); + + // function pointers for inplace operation on contiguous inputs and output + using fn_ns::BitwiseLeftShiftInplaceContigFactory; + DispatchTableBuilder + dtb5; + dtb5.populate_dispatch_table( + bitwise_left_shift_inplace_contig_dispatch_table); }; } // namespace impl @@ -767,6 +790,13 @@ static binary_strided_impl_fn_ptr_t bitwise_right_shift_strided_dispatch_table[td_ns::num_types] [td_ns::num_types]; +static binary_inplace_contig_impl_fn_ptr_t + bitwise_right_shift_inplace_contig_dispatch_table[td_ns::num_types] + [td_ns::num_types]; +static binary_inplace_strided_impl_fn_ptr_t + bitwise_right_shift_inplace_strided_dispatch_table[td_ns::num_types] + [td_ns::num_types]; + void populate_bitwise_right_shift_dispatch_tables(void) { using namespace td_ns; @@ -790,6 +820,22 @@ void populate_bitwise_right_shift_dispatch_tables(void) BitwiseRightShiftContigFactory, num_types> dtb3; dtb3.populate_dispatch_table(bitwise_right_shift_contig_dispatch_table); + + // function pointers for inplace operation on general strided arrays + using fn_ns::BitwiseRightShiftInplaceStridedFactory; + DispatchTableBuilder + dtb4; + dtb4.populate_dispatch_table( + bitwise_right_shift_inplace_strided_dispatch_table); + + // function pointers for inplace operation on contiguous inputs and output + using fn_ns::BitwiseRightShiftInplaceContigFactory; + DispatchTableBuilder + dtb5; + dtb5.populate_dispatch_table( + bitwise_right_shift_inplace_contig_dispatch_table); }; } // namespace impl @@ -3426,6 +3472,32 @@ void init_elementwise_functions(py::module_ m) py::arg("sycl_queue"), py::arg("depends") = py::list()); m.def("_bitwise_left_shift_result_type", bitwise_left_shift_result_type_pyapi, ""); + + using impl::bitwise_left_shift_inplace_contig_dispatch_table; + using impl::bitwise_left_shift_inplace_strided_dispatch_table; + + auto bitwise_left_shift_inplace_pyapi = + [&](const dpctl::tensor::usm_ndarray &src, + const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, + const std::vector &depends = {}) { + return py_binary_inplace_ufunc( + src, dst, exec_q, depends, + bitwise_left_shift_output_id_table, + // function pointers to handle inplace operation on + // contiguous arrays (pointers may be nullptr) + bitwise_left_shift_inplace_contig_dispatch_table, + // function pointers to handle inplace operation on strided + // arrays (most general case) + bitwise_left_shift_inplace_strided_dispatch_table, + // function pointers to handle inplace operation on + // c-contig matrix with c-contig row with broadcasting + // (may be nullptr) + td_ns::NullPtrTable< + binary_inplace_row_matrix_broadcast_impl_fn_ptr_t>{}); + }; + m.def("_bitwise_left_shift_inplace", bitwise_left_shift_inplace_pyapi, + "", py::arg("lhs"), py::arg("rhs"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); } // U08: ===== BITWISE_INVERT (x) @@ -3564,6 +3636,32 @@ void init_elementwise_functions(py::module_ m) py::arg("sycl_queue"), py::arg("depends") = py::list()); m.def("_bitwise_right_shift_result_type", bitwise_right_shift_result_type_pyapi, ""); + + using impl::bitwise_right_shift_inplace_contig_dispatch_table; + using impl::bitwise_right_shift_inplace_strided_dispatch_table; + + auto bitwise_right_shift_inplace_pyapi = + [&](const dpctl::tensor::usm_ndarray &src, + const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, + const std::vector &depends = {}) { + return py_binary_inplace_ufunc( + src, dst, exec_q, depends, + bitwise_right_shift_output_id_table, + // function pointers to handle inplace operation on + // contiguous arrays (pointers may be nullptr) + bitwise_right_shift_inplace_contig_dispatch_table, + // function pointers to handle inplace operation on strided + // arrays (most general case) + bitwise_right_shift_inplace_strided_dispatch_table, + // function pointers to handle inplace operation on + // c-contig matrix with c-contig row with broadcasting + // (may be nullptr) + td_ns::NullPtrTable< + binary_inplace_row_matrix_broadcast_impl_fn_ptr_t>{}); + }; + m.def("_bitwise_right_shift_inplace", bitwise_right_shift_inplace_pyapi, + "", py::arg("lhs"), py::arg("rhs"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); } // B07: ===== BITWISE_XOR (x1, x2) From f2b335d00a4567e929a7a19e6c307f803c272dd8 Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Thu, 19 Oct 2023 09:45:37 -0700 Subject: [PATCH 06/19] Adds tests for in-place bitwise elementwise funcs --- dpctl/tests/elementwise/test_bitwise_and.py | 56 +++++++++++++++++++ .../elementwise/test_bitwise_left_shift.py | 52 +++++++++++++++++ dpctl/tests/elementwise/test_bitwise_or.py | 56 +++++++++++++++++++ .../elementwise/test_bitwise_right_shift.py | 52 +++++++++++++++++ dpctl/tests/elementwise/test_bitwise_xor.py | 56 +++++++++++++++++++ 5 files changed, 272 insertions(+) diff --git a/dpctl/tests/elementwise/test_bitwise_and.py b/dpctl/tests/elementwise/test_bitwise_and.py index b3a5bd665b..824e319709 100644 --- a/dpctl/tests/elementwise/test_bitwise_and.py +++ b/dpctl/tests/elementwise/test_bitwise_and.py @@ -18,6 +18,7 @@ import pytest import dpctl.tensor as dpt +from dpctl.tensor._type_utils import _can_cast from dpctl.tests.helper import get_queue_or_skip, skip_if_dtype_not_supported from .utils import _integral_dtypes @@ -85,3 +86,58 @@ def test_bitwise_and_bool(): r_lo = dpt.logical_and(x1[:, dpt.newaxis], x2[dpt.newaxis]) assert dpt.all(dpt.equal(r_bw, r_lo)) + + +@pytest.mark.parametrize("dtype", ["?"] + _integral_dtypes) +def test_bitwise_and_inplace_python_scalar(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + X = dpt.zeros((10, 10), dtype=dtype, sycl_queue=q) + dt_kind = X.dtype.kind + if dt_kind == "b": + X &= False + else: + X &= int(0) + + +@pytest.mark.parametrize("op1_dtype", ["?"] + _integral_dtypes) +@pytest.mark.parametrize("op2_dtype", ["?"] + _integral_dtypes) +def test_bitwise_and_inplace_dtype_matrix(op1_dtype, op2_dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(op1_dtype, q) + skip_if_dtype_not_supported(op2_dtype, q) + + sz = 127 + ar1 = dpt.ones(sz, dtype=op1_dtype, sycl_queue=q) + ar2 = dpt.ones_like(ar1, dtype=op2_dtype, sycl_queue=q) + + dev = q.sycl_device + _fp16 = dev.has_aspect_fp16 + _fp64 = dev.has_aspect_fp64 + if _can_cast(ar2.dtype, ar1.dtype, _fp16, _fp64): + ar1 &= ar2 + assert dpt.all(ar1 == 1) + + ar3 = dpt.ones(sz, dtype=op1_dtype, sycl_queue=q)[::-1] + ar4 = dpt.ones(2 * sz, dtype=op2_dtype, sycl_queue=q)[::2] + ar3 &= ar4 + assert dpt.all(ar3 == 1) + else: + with pytest.raises(TypeError): + ar1 &= ar2 + dpt.bitwise_and(ar1, ar2, out=ar1) + + # out is second arg + ar1 = dpt.ones(sz, dtype=op1_dtype, sycl_queue=q) + ar2 = dpt.ones_like(ar1, dtype=op2_dtype, sycl_queue=q) + if _can_cast(ar1.dtype, ar2.dtype, _fp16, _fp64): + dpt.bitwise_and(ar1, ar2, out=ar2) + assert dpt.all(ar2 == 1) + + ar3 = dpt.ones(sz, dtype=op1_dtype, sycl_queue=q)[::-1] + ar4 = dpt.ones(2 * sz, dtype=op2_dtype, sycl_queue=q)[::2] + dpt.bitwise_and(ar3, ar4, out=ar4) + dpt.all(ar4 == 1) + else: + with pytest.raises(TypeError): + dpt.bitwise_and(ar1, ar2, out=ar2) diff --git a/dpctl/tests/elementwise/test_bitwise_left_shift.py b/dpctl/tests/elementwise/test_bitwise_left_shift.py index cee1019353..06684ac13b 100644 --- a/dpctl/tests/elementwise/test_bitwise_left_shift.py +++ b/dpctl/tests/elementwise/test_bitwise_left_shift.py @@ -18,6 +18,7 @@ import pytest import dpctl.tensor as dpt +from dpctl.tensor._type_utils import _can_cast from dpctl.tests.helper import get_queue_or_skip, skip_if_dtype_not_supported from .utils import _integral_dtypes @@ -97,3 +98,54 @@ def test_bitwise_left_shift_range(op_dtype): z = dpt.bitwise_left_shift(x, y) assert dpt.all(dpt.equal(z, 0)) + + +@pytest.mark.parametrize("dtype", _integral_dtypes) +def test_bitwise_left_shift_inplace_python_scalar(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + X = dpt.zeros((10, 10), dtype=dtype, sycl_queue=q) + X <<= int(0) + + +@pytest.mark.parametrize("op1_dtype", _integral_dtypes) +@pytest.mark.parametrize("op2_dtype", _integral_dtypes) +def test_bitwise_left_shift_inplace_dtype_matrix(op1_dtype, op2_dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(op1_dtype, q) + skip_if_dtype_not_supported(op2_dtype, q) + + sz = 127 + ar1 = dpt.ones(sz, dtype=op1_dtype, sycl_queue=q) + ar2 = dpt.ones_like(ar1, dtype=op2_dtype, sycl_queue=q) + + dev = q.sycl_device + _fp16 = dev.has_aspect_fp16 + _fp64 = dev.has_aspect_fp64 + if _can_cast(ar2.dtype, ar1.dtype, _fp16, _fp64): + ar1 <<= ar2 + assert dpt.all(ar1 == 2) + + ar3 = dpt.ones(sz, dtype=op1_dtype, sycl_queue=q)[::-1] + ar4 = dpt.ones(2 * sz, dtype=op2_dtype, sycl_queue=q)[::2] + ar3 <<= ar4 + assert dpt.all(ar3 == 2) + else: + with pytest.raises(TypeError): + ar1 <<= ar2 + dpt.bitwise_left_shift(ar1, ar2, out=ar1) + + # out is second arg + ar1 = dpt.ones(sz, dtype=op1_dtype, sycl_queue=q) + ar2 = dpt.ones_like(ar1, dtype=op2_dtype, sycl_queue=q) + if _can_cast(ar1.dtype, ar2.dtype, _fp16, _fp64): + dpt.bitwise_left_shift(ar1, ar2, out=ar2) + assert dpt.all(ar2 == 2) + + ar3 = dpt.ones(sz, dtype=op1_dtype, sycl_queue=q)[::-1] + ar4 = dpt.ones(2 * sz, dtype=op2_dtype, sycl_queue=q)[::2] + dpt.bitwise_left_shift(ar3, ar4, out=ar4) + dpt.all(ar4 == 2) + else: + with pytest.raises(TypeError): + dpt.bitwise_left_shift(ar1, ar2, out=ar2) diff --git a/dpctl/tests/elementwise/test_bitwise_or.py b/dpctl/tests/elementwise/test_bitwise_or.py index d273bd1507..49949cb795 100644 --- a/dpctl/tests/elementwise/test_bitwise_or.py +++ b/dpctl/tests/elementwise/test_bitwise_or.py @@ -18,6 +18,7 @@ import pytest import dpctl.tensor as dpt +from dpctl.tensor._type_utils import _can_cast from dpctl.tests.helper import get_queue_or_skip, skip_if_dtype_not_supported from .utils import _integral_dtypes @@ -85,3 +86,58 @@ def test_bitwise_or_bool(): r_lo = dpt.logical_or(x1[:, dpt.newaxis], x2[dpt.newaxis]) assert dpt.all(dpt.equal(r_bw, r_lo)) + + +@pytest.mark.parametrize("dtype", ["?"] + _integral_dtypes) +def test_bitwise_or_inplace_python_scalar(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + X = dpt.zeros((10, 10), dtype=dtype, sycl_queue=q) + dt_kind = X.dtype.kind + if dt_kind == "b": + X |= False + else: + X |= int(0) + + +@pytest.mark.parametrize("op1_dtype", ["?"] + _integral_dtypes) +@pytest.mark.parametrize("op2_dtype", ["?"] + _integral_dtypes) +def test_bitwise_or_inplace_dtype_matrix(op1_dtype, op2_dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(op1_dtype, q) + skip_if_dtype_not_supported(op2_dtype, q) + + sz = 127 + ar1 = dpt.ones(sz, dtype=op1_dtype, sycl_queue=q) + ar2 = dpt.ones_like(ar1, dtype=op2_dtype, sycl_queue=q) + + dev = q.sycl_device + _fp16 = dev.has_aspect_fp16 + _fp64 = dev.has_aspect_fp64 + if _can_cast(ar2.dtype, ar1.dtype, _fp16, _fp64): + ar1 |= ar2 + assert dpt.all(ar1 == 1) + + ar3 = dpt.ones(sz, dtype=op1_dtype, sycl_queue=q)[::-1] + ar4 = dpt.ones(2 * sz, dtype=op2_dtype, sycl_queue=q)[::2] + ar3 |= ar4 + assert dpt.all(ar3 == 1) + else: + with pytest.raises(TypeError): + ar1 |= ar2 + dpt.bitwise_or(ar1, ar2, out=ar1) + + # out is second arg + ar1 = dpt.ones(sz, dtype=op1_dtype, sycl_queue=q) + ar2 = dpt.ones_like(ar1, dtype=op2_dtype, sycl_queue=q) + if _can_cast(ar1.dtype, ar2.dtype, _fp16, _fp64): + dpt.bitwise_or(ar1, ar2, out=ar2) + assert dpt.all(ar2 == 1) + + ar3 = dpt.ones(sz, dtype=op1_dtype, sycl_queue=q)[::-1] + ar4 = dpt.ones(2 * sz, dtype=op2_dtype, sycl_queue=q)[::2] + dpt.bitwise_or(ar3, ar4, out=ar4) + dpt.all(ar4 == 1) + else: + with pytest.raises(TypeError): + dpt.bitwise_or(ar1, ar2, out=ar2) diff --git a/dpctl/tests/elementwise/test_bitwise_right_shift.py b/dpctl/tests/elementwise/test_bitwise_right_shift.py index ceadb9414d..37112133db 100644 --- a/dpctl/tests/elementwise/test_bitwise_right_shift.py +++ b/dpctl/tests/elementwise/test_bitwise_right_shift.py @@ -18,6 +18,7 @@ import pytest import dpctl.tensor as dpt +from dpctl.tensor._type_utils import _can_cast from dpctl.tests.helper import get_queue_or_skip, skip_if_dtype_not_supported from .utils import _integral_dtypes @@ -97,3 +98,54 @@ def test_bitwise_right_shift_range(op_dtype): z = dpt.bitwise_right_shift(x, y) assert dpt.all(dpt.equal(z, 0)) + + +@pytest.mark.parametrize("dtype", _integral_dtypes) +def test_bitwise_right_shift_inplace_python_scalar(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + X = dpt.zeros((10, 10), dtype=dtype, sycl_queue=q) + X >>= int(0) + + +@pytest.mark.parametrize("op1_dtype", _integral_dtypes) +@pytest.mark.parametrize("op2_dtype", _integral_dtypes) +def test_bitwise_right_shift_inplace_dtype_matrix(op1_dtype, op2_dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(op1_dtype, q) + skip_if_dtype_not_supported(op2_dtype, q) + + sz = 127 + ar1 = dpt.ones(sz, dtype=op1_dtype, sycl_queue=q) + ar2 = dpt.ones_like(ar1, dtype=op2_dtype, sycl_queue=q) + + dev = q.sycl_device + _fp16 = dev.has_aspect_fp16 + _fp64 = dev.has_aspect_fp64 + if _can_cast(ar2.dtype, ar1.dtype, _fp16, _fp64): + ar1 >>= ar2 + assert dpt.all(ar1 == 0) + + ar3 = dpt.ones(sz, dtype=op1_dtype, sycl_queue=q)[::-1] + ar4 = dpt.ones(2 * sz, dtype=op2_dtype, sycl_queue=q)[::2] + ar3 >>= ar4 + assert dpt.all(ar3 == 0) + else: + with pytest.raises(TypeError): + ar1 >>= ar2 + dpt.bitwise_right_shift(ar1, ar2, out=ar1) + + # out is second arg + ar1 = dpt.ones(sz, dtype=op1_dtype, sycl_queue=q) + ar2 = dpt.ones_like(ar1, dtype=op2_dtype, sycl_queue=q) + if _can_cast(ar1.dtype, ar2.dtype, _fp16, _fp64): + dpt.bitwise_right_shift(ar1, ar2, out=ar2) + assert dpt.all(ar2 == 0) + + ar3 = dpt.ones(sz, dtype=op1_dtype, sycl_queue=q)[::-1] + ar4 = dpt.ones(2 * sz, dtype=op2_dtype, sycl_queue=q)[::2] + dpt.bitwise_right_shift(ar3, ar4, out=ar4) + dpt.all(ar4 == 0) + else: + with pytest.raises(TypeError): + dpt.bitwise_right_shift(ar1, ar2, out=ar2) diff --git a/dpctl/tests/elementwise/test_bitwise_xor.py b/dpctl/tests/elementwise/test_bitwise_xor.py index b2cb11bc84..e9501b642f 100644 --- a/dpctl/tests/elementwise/test_bitwise_xor.py +++ b/dpctl/tests/elementwise/test_bitwise_xor.py @@ -18,6 +18,7 @@ import pytest import dpctl.tensor as dpt +from dpctl.tensor._type_utils import _can_cast from dpctl.tests.helper import get_queue_or_skip, skip_if_dtype_not_supported from .utils import _integral_dtypes @@ -85,3 +86,58 @@ def test_bitwise_xor_bool(): r_lo = dpt.logical_xor(x1[:, dpt.newaxis], x2[dpt.newaxis]) assert dpt.all(dpt.equal(r_bw, r_lo)) + + +@pytest.mark.parametrize("dtype", ["?"] + _integral_dtypes) +def test_bitwise_xor_inplace_python_scalar(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + X = dpt.zeros((10, 10), dtype=dtype, sycl_queue=q) + dt_kind = X.dtype.kind + if dt_kind == "b": + X ^= False + else: + X ^= int(0) + + +@pytest.mark.parametrize("op1_dtype", ["?"] + _integral_dtypes) +@pytest.mark.parametrize("op2_dtype", ["?"] + _integral_dtypes) +def test_bitwise_xor_inplace_dtype_matrix(op1_dtype, op2_dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(op1_dtype, q) + skip_if_dtype_not_supported(op2_dtype, q) + + sz = 127 + ar1 = dpt.ones(sz, dtype=op1_dtype, sycl_queue=q) + ar2 = dpt.ones_like(ar1, dtype=op2_dtype, sycl_queue=q) + + dev = q.sycl_device + _fp16 = dev.has_aspect_fp16 + _fp64 = dev.has_aspect_fp64 + if _can_cast(ar2.dtype, ar1.dtype, _fp16, _fp64): + ar1 ^= ar2 + assert dpt.all(ar1 == 0) + + ar3 = dpt.ones(sz, dtype=op1_dtype, sycl_queue=q)[::-1] + ar4 = dpt.ones(2 * sz, dtype=op2_dtype, sycl_queue=q)[::2] + ar3 ^= ar4 + assert dpt.all(ar3 == 0) + else: + with pytest.raises(TypeError): + ar1 ^= ar2 + dpt.bitwise_xor(ar1, ar2, out=ar1) + + # out is second arg + ar1 = dpt.ones(sz, dtype=op1_dtype, sycl_queue=q) + ar2 = dpt.ones_like(ar1, dtype=op2_dtype, sycl_queue=q) + if _can_cast(ar1.dtype, ar2.dtype, _fp16, _fp64): + dpt.bitwise_xor(ar1, ar2, out=ar2) + assert dpt.all(ar2 == 0) + + ar3 = dpt.ones(sz, dtype=op1_dtype, sycl_queue=q)[::-1] + ar4 = dpt.ones(2 * sz, dtype=op2_dtype, sycl_queue=q)[::2] + dpt.bitwise_xor(ar3, ar4, out=ar4) + dpt.all(ar4 == 0) + else: + with pytest.raises(TypeError): + dpt.bitwise_xor(ar1, ar2, out=ar2) From 3ad6d8b4fb98f75d02447c4bd1d968f702bd1da1 Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Thu, 19 Oct 2023 16:01:53 -0700 Subject: [PATCH 07/19] Added tests for in-place remainder and pow Fixed in-place remainder for devices that do not support 64-bit floating point data types --- .../elementwise_functions/remainder.hpp | 27 +++++---- dpctl/tests/elementwise/test_pow.py | 58 +++++++++++++++++++ dpctl/tests/elementwise/test_remainder.py | 52 +++++++++++++++++ 3 files changed, 126 insertions(+), 11 deletions(-) diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/remainder.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/remainder.hpp index 7ffb5147c2..2a821b4e85 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/remainder.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/remainder.hpp @@ -316,10 +316,11 @@ template struct RemainderStridedFactory template struct RemainderInplaceFunctor { - using supports_sg_loadstore = std::negation< - std::disjunction, tu_ns::is_complex>>; - using supports_vec = std::negation< - std::disjunction, tu_ns::is_complex>>; + using supports_sg_loadstore = std::true_type; + using supports_vec = std::true_type; + + // functor is only well-defined when argT and resT are the same + static_assert(std::is_same_v); void operator()(resT &res, const argT &in) { @@ -331,7 +332,7 @@ template struct RemainderInplaceFunctor if constexpr (std::is_signed_v || std::is_signed_v) { auto tmp = res; res %= in; - if (res != 0 && l_xor(tmp < 0, in < 0)) { + if (res != resT(0) && l_xor(tmp < 0, in < 0)) { res += in; } } @@ -347,7 +348,7 @@ template struct RemainderInplaceFunctor } } else { - res = std::copysign(0, in); + res = sycl::copysign(resT(0), in); } } } @@ -384,7 +385,7 @@ template struct RemainderInplaceFunctor } } else { - res[i] = std::copysign(0, in[i]); + res[i] = sycl::copysign(resT(0), in[i]); } } } @@ -444,8 +445,10 @@ struct RemainderInplaceContigFactory { fnT get() { - if constexpr (std::is_same_v::value_type, - void>) { + if constexpr (std::is_same_v< + typename RemainderOutputType::value_type, + void>) + { fnT fn = nullptr; return fn; } @@ -484,8 +487,10 @@ struct RemainderInplaceStridedFactory { fnT get() { - if constexpr (std::is_same_v::value_type, - void>) { + if constexpr (std::is_same_v< + typename RemainderOutputType::value_type, + void>) + { fnT fn = nullptr; return fn; } diff --git a/dpctl/tests/elementwise/test_pow.py b/dpctl/tests/elementwise/test_pow.py index 1f13e2b533..8b76e3a9fc 100644 --- a/dpctl/tests/elementwise/test_pow.py +++ b/dpctl/tests/elementwise/test_pow.py @@ -21,6 +21,7 @@ import dpctl import dpctl.tensor as dpt +from dpctl.tensor._type_utils import _can_cast from dpctl.tests.helper import get_queue_or_skip, skip_if_dtype_not_supported from .utils import _all_dtypes, _compare_dtypes, _usm_types @@ -152,3 +153,60 @@ def test_pow_python_scalar(arr_dt): assert isinstance(R, dpt.usm_ndarray) R = dpt.pow(sc, X) assert isinstance(R, dpt.usm_ndarray) + + +@pytest.mark.parametrize("dtype", _all_dtypes[1:]) +def test_pow_inplace_python_scalar(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + X = dpt.ones((10, 10), dtype=dtype, sycl_queue=q) + dt_kind = X.dtype.kind + if dt_kind in "ui": + X **= int(1) + elif dt_kind == "f": + X **= float(1) + elif dt_kind == "c": + X **= complex(1) + + +@pytest.mark.parametrize("op1_dtype", _all_dtypes[1:]) +@pytest.mark.parametrize("op2_dtype", _all_dtypes[1:]) +def test_pow_inplace_dtype_matrix(op1_dtype, op2_dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(op1_dtype, q) + skip_if_dtype_not_supported(op2_dtype, q) + + sz = 127 + ar1 = dpt.ones(sz, dtype=op1_dtype) + ar2 = dpt.ones_like(ar1, dtype=op2_dtype) + + dev = q.sycl_device + _fp16 = dev.has_aspect_fp16 + _fp64 = dev.has_aspect_fp64 + if _can_cast(ar2.dtype, ar1.dtype, _fp16, _fp64): + ar1 **= ar2 + assert ( + dpt.asnumpy(ar1) == np.full(ar1.shape, 1, dtype=ar1.dtype) + ).all() + + ar3 = dpt.ones(sz, dtype=op1_dtype) + ar4 = dpt.ones(2 * sz, dtype=op2_dtype) + + ar3[::-1] *= ar4[::2] + assert ( + dpt.asnumpy(ar3) == np.full(ar3.shape, 1, dtype=ar3.dtype) + ).all() + + else: + with pytest.raises(TypeError): + ar1 **= ar2 + + +def test_pow_inplace_basic(): + get_queue_or_skip() + + x = dpt.arange(10, dtype="i4") + expected = dpt.square(x) + x **= 2 + + assert dpt.all(x == expected) diff --git a/dpctl/tests/elementwise/test_remainder.py b/dpctl/tests/elementwise/test_remainder.py index def594f269..47500954a2 100644 --- a/dpctl/tests/elementwise/test_remainder.py +++ b/dpctl/tests/elementwise/test_remainder.py @@ -21,6 +21,7 @@ import dpctl import dpctl.tensor as dpt +from dpctl.tensor._type_utils import _can_cast from dpctl.tests.helper import get_queue_or_skip, skip_if_dtype_not_supported from .utils import _compare_dtypes, _no_complex_dtypes, _usm_types @@ -206,3 +207,54 @@ def test_remainder_python_scalar(arr_dt): assert isinstance(R, dpt.usm_ndarray) R = dpt.remainder(sc, X) assert isinstance(R, dpt.usm_ndarray) + + +@pytest.mark.parametrize("dtype", _no_complex_dtypes[1:]) +def test_remainder_inplace_python_scalar(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + X = dpt.ones((10, 10), dtype=dtype, sycl_queue=q) + dt_kind = X.dtype.kind + if dt_kind in "ui": + X %= int(1) + elif dt_kind == "f": + X %= float(1) + + +@pytest.mark.parametrize("op1_dtype", _no_complex_dtypes[1:]) +@pytest.mark.parametrize("op2_dtype", _no_complex_dtypes[1:]) +def test_remainder_inplace_dtype_matrix(op1_dtype, op2_dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(op1_dtype, q) + skip_if_dtype_not_supported(op2_dtype, q) + + sz = 127 + ar1 = dpt.ones(sz, dtype=op1_dtype) + ar2 = dpt.ones_like(ar1, dtype=op2_dtype) + + dev = q.sycl_device + _fp16 = dev.has_aspect_fp16 + _fp64 = dev.has_aspect_fp64 + if _can_cast(ar2.dtype, ar1.dtype, _fp16, _fp64): + ar1 %= ar2 + assert dpt.all(ar1 == dpt.zeros(ar1.shape, dtype=ar1.dtype)) + + ar3 = dpt.ones(sz, dtype=op1_dtype) + ar4 = dpt.ones(2 * sz, dtype=op2_dtype) + + ar3[::-1] %= ar4[::2] + assert dpt.all(ar3 == dpt.zeros(ar3.shape, dtype=ar3.dtype)) + + else: + with pytest.raises(TypeError): + ar1 %= ar2 + + +def test_remainder_inplace_basic(): + get_queue_or_skip() + + x = dpt.arange(10, dtype="i4") + expected = x & 1 + x %= 2 + + assert dpt.all(x == expected) From b291c971896a90559bbacc6bb482e1bdc1715f2b Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Fri, 20 Oct 2023 16:07:55 -0700 Subject: [PATCH 08/19] Test commit splitting up elementwise functions --- dpctl/tensor/CMakeLists.txt | 6 +- .../source/elementwise_functions.hpp | 56 +- .../source/elementwise_functions1.cpp | 2577 ++++++++++ .../source/elementwise_functions1.hpp | 40 + ...nctions.cpp => elementwise_functions2.cpp} | 4228 ++++------------- .../source/elementwise_functions2.hpp | 40 + dpctl/tensor/libtensor/source/tensor_py.cpp | 6 +- 7 files changed, 3553 insertions(+), 3400 deletions(-) create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions1.cpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions1.hpp rename dpctl/tensor/libtensor/source/{elementwise_functions.cpp => elementwise_functions2.cpp} (54%) create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions2.hpp diff --git a/dpctl/tensor/CMakeLists.txt b/dpctl/tensor/CMakeLists.txt index aba009411d..81702fa486 100644 --- a/dpctl/tensor/CMakeLists.txt +++ b/dpctl/tensor/CMakeLists.txt @@ -47,7 +47,8 @@ set(_tensor_impl_sources ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/where.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/boolean_reductions.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/device_support_queries.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions1.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions2.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/repeat.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/reduction_over_axis.cpp ) @@ -63,7 +64,8 @@ endif() set(_no_fast_math_sources ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/full_ctor.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/linear_sequences.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions1.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions2.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/reduction_over_axis.cpp ) foreach(_src_fn ${_no_fast_math_sources}) diff --git a/dpctl/tensor/libtensor/source/elementwise_functions.hpp b/dpctl/tensor/libtensor/source/elementwise_functions.hpp index 523e4259c3..198aad61e8 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions.hpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions.hpp @@ -22,7 +22,6 @@ /// This file defines functions of dpctl.tensor._tensor_impl extensions, /// specifically functions for elementwise operations. //===----------------------------------------------------------------------===// - #pragma once #include "dpctl4pybind11.hpp" @@ -38,6 +37,54 @@ #include "utils/offset_utils.hpp" #include "utils/type_dispatch.hpp" +namespace td_ns = dpctl::tensor::type_dispatch; + +static inline py::dtype _dtype_from_typenum(td_ns::typenum_t dst_typenum_t) +{ + switch (dst_typenum_t) { + case td_ns::typenum_t::BOOL: + return py::dtype("?"); + case td_ns::typenum_t::INT8: + return py::dtype("i1"); + case td_ns::typenum_t::UINT8: + return py::dtype("u1"); + case td_ns::typenum_t::INT16: + return py::dtype("i2"); + case td_ns::typenum_t::UINT16: + return py::dtype("u2"); + case td_ns::typenum_t::INT32: + return py::dtype("i4"); + case td_ns::typenum_t::UINT32: + return py::dtype("u4"); + case td_ns::typenum_t::INT64: + return py::dtype("i8"); + case td_ns::typenum_t::UINT64: + return py::dtype("u8"); + case td_ns::typenum_t::HALF: + return py::dtype("f2"); + case td_ns::typenum_t::FLOAT: + return py::dtype("f4"); + case td_ns::typenum_t::DOUBLE: + return py::dtype("f8"); + case td_ns::typenum_t::CFLOAT: + return py::dtype("c8"); + case td_ns::typenum_t::CDOUBLE: + return py::dtype("c16"); + default: + throw py::value_error("Unrecognized dst_typeid"); + } +} + +static inline int _result_typeid(int arg_typeid, const int *fn_output_id) +{ + if (arg_typeid < 0 || arg_typeid >= td_ns::num_types) { + throw py::value_error("Input typeid " + std::to_string(arg_typeid) + + " is outside of expected bounds."); + } + + return fn_output_id[arg_typeid]; +} + namespace dpctl { namespace tensor @@ -45,11 +92,6 @@ namespace tensor namespace py_internal { -namespace td_ns = dpctl::tensor::type_dispatch; - -extern py::dtype _dtype_from_typenum(td_ns::typenum_t dst_typenum_t); -extern int _result_typeid(int arg_typeid, const int *fn_output_id); - template @@ -825,8 +867,6 @@ py_binary_inplace_ufunc(const dpctl::tensor::usm_ndarray &lhs, strided_fn_ev); } -extern void init_elementwise_functions(py::module_ m); - } // namespace py_internal } // namespace tensor } // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions1.cpp b/dpctl/tensor/libtensor/source/elementwise_functions1.cpp new file mode 100644 index 0000000000..7de986856c --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions1.cpp @@ -0,0 +1,2577 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#include "dpctl4pybind11.hpp" +#include +#include +#include +#include +#include + +#include "elementwise_functions.hpp" +#include "elementwise_functions1.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/less.hpp" +#include "kernels/elementwise_functions/less_equal.hpp" +#include "kernels/elementwise_functions/log.hpp" +#include "kernels/elementwise_functions/log10.hpp" +#include "kernels/elementwise_functions/log1p.hpp" +#include "kernels/elementwise_functions/log2.hpp" +#include "kernels/elementwise_functions/logaddexp.hpp" +#include "kernels/elementwise_functions/logical_and.hpp" +#include "kernels/elementwise_functions/logical_not.hpp" +#include "kernels/elementwise_functions/logical_or.hpp" +#include "kernels/elementwise_functions/logical_xor.hpp" +#include "kernels/elementwise_functions/maximum.hpp" +#include "kernels/elementwise_functions/minimum.hpp" +#include "kernels/elementwise_functions/multiply.hpp" +#include "kernels/elementwise_functions/negative.hpp" +#include "kernels/elementwise_functions/not_equal.hpp" +#include "kernels/elementwise_functions/positive.hpp" +#include "kernels/elementwise_functions/pow.hpp" +#include "kernels/elementwise_functions/proj.hpp" +#include "kernels/elementwise_functions/real.hpp" +#include "kernels/elementwise_functions/remainder.hpp" +#include "kernels/elementwise_functions/round.hpp" +#include "kernels/elementwise_functions/rsqrt.hpp" +#include "kernels/elementwise_functions/sign.hpp" +#include "kernels/elementwise_functions/signbit.hpp" +#include "kernels/elementwise_functions/sin.hpp" +#include "kernels/elementwise_functions/sinh.hpp" +#include "kernels/elementwise_functions/sqrt.hpp" +#include "kernels/elementwise_functions/square.hpp" +#include "kernels/elementwise_functions/subtract.hpp" +#include "kernels/elementwise_functions/tan.hpp" +#include "kernels/elementwise_functions/tanh.hpp" +#include "kernels/elementwise_functions/trunc.hpp" + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::binary_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_strided_impl_fn_ptr_t; +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +using ew_cmn_ns::binary_inplace_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_inplace_row_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_inplace_strided_impl_fn_ptr_t; + +// B13: ==== LESS (x1, x2) +namespace impl +{ +namespace less_fn_ns = dpctl::tensor::kernels::less; + +static binary_contig_impl_fn_ptr_t less_contig_dispatch_table[td_ns::num_types] + [td_ns::num_types]; +static int less_output_id_table[td_ns::num_types][td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + less_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +void populate_less_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = less_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::LessTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(less_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::LessStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(less_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::LessContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(less_contig_dispatch_table); +}; +} // namespace impl + +// B14: ==== LESS_EQUAL (x1, x2) +namespace impl +{ +namespace less_equal_fn_ns = dpctl::tensor::kernels::less_equal; + +static binary_contig_impl_fn_ptr_t + less_equal_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static int less_equal_output_id_table[td_ns::num_types][td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + less_equal_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +void populate_less_equal_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = less_equal_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::LessEqualTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(less_equal_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::LessEqualStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(less_equal_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::LessEqualContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(less_equal_contig_dispatch_table); +}; +} // namespace impl + +// U20: ==== LOG (x) +namespace impl +{ + +namespace log_fn_ns = dpctl::tensor::kernels::log; + +static unary_contig_impl_fn_ptr_t log_contig_dispatch_vector[td_ns::num_types]; +static int log_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + log_strided_dispatch_vector[td_ns::num_types]; + +void populate_log_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = log_fn_ns; + + using fn_ns::LogContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(log_contig_dispatch_vector); + + using fn_ns::LogStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(log_strided_dispatch_vector); + + using fn_ns::LogTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(log_output_typeid_vector); +} + +} // namespace impl + +// U21: ==== LOG1P (x) +namespace impl +{ + +namespace log1p_fn_ns = dpctl::tensor::kernels::log1p; + +static unary_contig_impl_fn_ptr_t + log1p_contig_dispatch_vector[td_ns::num_types]; +static int log1p_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + log1p_strided_dispatch_vector[td_ns::num_types]; + +void populate_log1p_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = log1p_fn_ns; + + using fn_ns::Log1pContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(log1p_contig_dispatch_vector); + + using fn_ns::Log1pStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(log1p_strided_dispatch_vector); + + using fn_ns::Log1pTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(log1p_output_typeid_vector); +} + +} // namespace impl + +// U22: ==== LOG2 (x) +namespace impl +{ + +namespace log2_fn_ns = dpctl::tensor::kernels::log2; + +static unary_contig_impl_fn_ptr_t log2_contig_dispatch_vector[td_ns::num_types]; +static int log2_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + log2_strided_dispatch_vector[td_ns::num_types]; + +void populate_log2_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = log2_fn_ns; + + using fn_ns::Log2ContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(log2_contig_dispatch_vector); + + using fn_ns::Log2StridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(log2_strided_dispatch_vector); + + using fn_ns::Log2TypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(log2_output_typeid_vector); +}; + +} // namespace impl + +// U23: ==== LOG10 (x) +namespace impl +{ + +namespace log10_fn_ns = dpctl::tensor::kernels::log10; + +static unary_contig_impl_fn_ptr_t + log10_contig_dispatch_vector[td_ns::num_types]; +static int log10_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + log10_strided_dispatch_vector[td_ns::num_types]; + +void populate_log10_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = log10_fn_ns; + + using fn_ns::Log10ContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(log10_contig_dispatch_vector); + + using fn_ns::Log10StridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(log10_strided_dispatch_vector); + + using fn_ns::Log10TypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(log10_output_typeid_vector); +}; + +} // namespace impl + +// B15: ==== LOGADDEXP (x1, x2) +namespace impl +{ +namespace logaddexp_fn_ns = dpctl::tensor::kernels::logaddexp; + +static binary_contig_impl_fn_ptr_t + logaddexp_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static int logaddexp_output_id_table[td_ns::num_types][td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + logaddexp_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +void populate_logaddexp_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = logaddexp_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::LogAddExpTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(logaddexp_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::LogAddExpStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(logaddexp_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::LogAddExpContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(logaddexp_contig_dispatch_table); +}; +} // namespace impl + +// B16: ==== LOGICAL_AND (x1, x2) +namespace impl +{ +namespace logical_and_fn_ns = dpctl::tensor::kernels::logical_and; + +static binary_contig_impl_fn_ptr_t + logical_and_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static int logical_and_output_id_table[td_ns::num_types][td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + logical_and_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +void populate_logical_and_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = logical_and_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::LogicalAndTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(logical_and_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::LogicalAndStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(logical_and_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::LogicalAndContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(logical_and_contig_dispatch_table); +}; +} // namespace impl + +// U24: ==== LOGICAL_NOT (x) +namespace impl +{ +namespace logical_not_fn_ns = dpctl::tensor::kernels::logical_not; + +static unary_contig_impl_fn_ptr_t + logical_not_contig_dispatch_vector[td_ns::num_types]; +static int logical_not_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + logical_not_strided_dispatch_vector[td_ns::num_types]; + +void populate_logical_not_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = logical_not_fn_ns; + + using fn_ns::LogicalNotContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(logical_not_contig_dispatch_vector); + + using fn_ns::LogicalNotStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(logical_not_strided_dispatch_vector); + + using fn_ns::LogicalNotTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(logical_not_output_typeid_vector); +}; +} // namespace impl + +// B17: ==== LOGICAL_OR (x1, x2) +namespace impl +{ +namespace logical_or_fn_ns = dpctl::tensor::kernels::logical_or; + +static binary_contig_impl_fn_ptr_t + logical_or_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static int logical_or_output_id_table[td_ns::num_types][td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + logical_or_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +void populate_logical_or_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = logical_or_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::LogicalOrTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(logical_or_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::LogicalOrStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(logical_or_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::LogicalOrContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(logical_or_contig_dispatch_table); +}; +} // namespace impl + +// B18: ==== LOGICAL_XOR (x1, x2) +namespace impl +{ +namespace logical_xor_fn_ns = dpctl::tensor::kernels::logical_xor; + +static binary_contig_impl_fn_ptr_t + logical_xor_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static int logical_xor_output_id_table[td_ns::num_types][td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + logical_xor_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +void populate_logical_xor_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = logical_xor_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::LogicalXorTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(logical_xor_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::LogicalXorStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(logical_xor_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::LogicalXorContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(logical_xor_contig_dispatch_table); +}; +} // namespace impl + +// B??: ==== MAXIMUM (x1, x2) +namespace impl +{ + +namespace maximum_fn_ns = dpctl::tensor::kernels::maximum; + +static binary_contig_impl_fn_ptr_t + maximum_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static int maximum_output_id_table[td_ns::num_types][td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + maximum_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +void populate_maximum_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = maximum_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::MaximumTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(maximum_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::MaximumStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(maximum_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::MaximumContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(maximum_contig_dispatch_table); +}; + +} // namespace impl + +// B??: ==== MINIMUM (x1, x2) +namespace impl +{ + +namespace minimum_fn_ns = dpctl::tensor::kernels::minimum; + +static binary_contig_impl_fn_ptr_t + minimum_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static int minimum_output_id_table[td_ns::num_types][td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + minimum_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +void populate_minimum_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = minimum_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::MinimumTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(minimum_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::MinimumStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(minimum_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::MinimumContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(minimum_contig_dispatch_table); +}; + +} // namespace impl + +// B19: ==== MULTIPLY (x1, x2) +namespace impl +{ + +namespace multiply_fn_ns = dpctl::tensor::kernels::multiply; + +static binary_contig_impl_fn_ptr_t + multiply_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static int multiply_output_id_table[td_ns::num_types][td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + multiply_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +// mul(matrix, row) +static binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t + multiply_contig_matrix_contig_row_broadcast_dispatch_table + [td_ns::num_types][td_ns::num_types]; + +// mul(row, matrix) +static binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t + multiply_contig_row_contig_matrix_broadcast_dispatch_table + [td_ns::num_types][td_ns::num_types]; + +static binary_inplace_contig_impl_fn_ptr_t + multiply_inplace_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static binary_inplace_strided_impl_fn_ptr_t + multiply_inplace_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; +static binary_inplace_row_matrix_broadcast_impl_fn_ptr_t + multiply_inplace_row_matrix_dispatch_table[td_ns::num_types] + [td_ns::num_types]; + +void populate_multiply_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = multiply_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::MultiplyTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(multiply_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::MultiplyStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(multiply_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::MultiplyContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(multiply_contig_dispatch_table); + + // function pointers for operation on contiguous matrix, contiguous row + // with contiguous matrix output + using fn_ns::MultiplyContigMatrixContigRowBroadcastFactory; + DispatchTableBuilder< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t, + MultiplyContigMatrixContigRowBroadcastFactory, num_types> + dtb4; + dtb4.populate_dispatch_table( + multiply_contig_matrix_contig_row_broadcast_dispatch_table); + + // function pointers for operation on contiguous row, contiguous matrix + // with contiguous matrix output + using fn_ns::MultiplyContigRowContigMatrixBroadcastFactory; + DispatchTableBuilder< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t, + MultiplyContigRowContigMatrixBroadcastFactory, num_types> + dtb5; + dtb5.populate_dispatch_table( + multiply_contig_row_contig_matrix_broadcast_dispatch_table); + + // function pointers for inplace operation on general strided arrays + using fn_ns::MultiplyInplaceStridedFactory; + DispatchTableBuilder + dtb6; + dtb6.populate_dispatch_table(multiply_inplace_strided_dispatch_table); + + // function pointers for inplace operation on contiguous inputs and output + using fn_ns::MultiplyInplaceContigFactory; + DispatchTableBuilder + dtb7; + dtb7.populate_dispatch_table(multiply_inplace_contig_dispatch_table); + + // function pointers for inplace operation on contiguous matrix + // and contiguous row + using fn_ns::MultiplyInplaceRowMatrixBroadcastFactory; + DispatchTableBuilder + dtb8; + dtb8.populate_dispatch_table(multiply_inplace_row_matrix_dispatch_table); +}; + +} // namespace impl + +// U25: ==== NEGATIVE (x) +namespace impl +{ + +namespace negative_fn_ns = dpctl::tensor::kernels::negative; + +static unary_contig_impl_fn_ptr_t + negative_contig_dispatch_vector[td_ns::num_types]; +static int negative_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + negative_strided_dispatch_vector[td_ns::num_types]; + +void populate_negative_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = negative_fn_ns; + + using fn_ns::NegativeContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(negative_contig_dispatch_vector); + + using fn_ns::NegativeStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(negative_strided_dispatch_vector); + + using fn_ns::NegativeTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(negative_output_typeid_vector); +} + +} // namespace impl + +// B20: ==== NOT_EQUAL (x1, x2) +namespace impl +{ +namespace not_equal_fn_ns = dpctl::tensor::kernels::not_equal; + +static binary_contig_impl_fn_ptr_t + not_equal_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static int not_equal_output_id_table[td_ns::num_types][td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + not_equal_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +void populate_not_equal_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = not_equal_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::NotEqualTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(not_equal_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::NotEqualStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(not_equal_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::NotEqualContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(not_equal_contig_dispatch_table); +}; +} // namespace impl + +// U26: ==== POSITIVE (x) +namespace impl +{ + +namespace positive_fn_ns = dpctl::tensor::kernels::positive; + +static unary_contig_impl_fn_ptr_t + positive_contig_dispatch_vector[td_ns::num_types]; +static int positive_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + positive_strided_dispatch_vector[td_ns::num_types]; + +void populate_positive_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = positive_fn_ns; + + using fn_ns::PositiveContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(positive_contig_dispatch_vector); + + using fn_ns::PositiveStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(positive_strided_dispatch_vector); + + using fn_ns::PositiveTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(positive_output_typeid_vector); +} + +} // namespace impl + +// B21: ==== POW (x1, x2) +namespace impl +{ + +namespace pow_fn_ns = dpctl::tensor::kernels::pow; + +static binary_contig_impl_fn_ptr_t pow_contig_dispatch_table[td_ns::num_types] + [td_ns::num_types]; +static int pow_output_id_table[td_ns::num_types][td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + pow_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +static binary_inplace_contig_impl_fn_ptr_t + pow_inplace_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static binary_inplace_strided_impl_fn_ptr_t + pow_inplace_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +void populate_pow_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = pow_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::PowTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(pow_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::PowStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(pow_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::PowContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(pow_contig_dispatch_table); + + // function pointers for inplace operation on general strided arrays + using fn_ns::PowInplaceStridedFactory; + DispatchTableBuilder + dtb4; + dtb4.populate_dispatch_table(pow_inplace_strided_dispatch_table); + + // function pointers for inplace operation on contiguous inputs and output + using fn_ns::PowInplaceContigFactory; + DispatchTableBuilder + dtb5; + dtb5.populate_dispatch_table(pow_inplace_contig_dispatch_table); +}; + +} // namespace impl + +// U??: ==== PROJ (x) +namespace impl +{ + +namespace proj_fn_ns = dpctl::tensor::kernels::proj; + +static unary_contig_impl_fn_ptr_t proj_contig_dispatch_vector[td_ns::num_types]; +static int proj_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + proj_strided_dispatch_vector[td_ns::num_types]; + +void populate_proj_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = proj_fn_ns; + + using fn_ns::ProjContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(proj_contig_dispatch_vector); + + using fn_ns::ProjStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(proj_strided_dispatch_vector); + + using fn_ns::ProjTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(proj_output_typeid_vector); +} +} // namespace impl + +// U27: ==== REAL (x) +namespace impl +{ + +namespace real_fn_ns = dpctl::tensor::kernels::real; + +static unary_contig_impl_fn_ptr_t real_contig_dispatch_vector[td_ns::num_types]; +static int real_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + real_strided_dispatch_vector[td_ns::num_types]; + +void populate_real_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = real_fn_ns; + + using fn_ns::RealContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(real_contig_dispatch_vector); + + using fn_ns::RealStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(real_strided_dispatch_vector); + + using fn_ns::RealTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(real_output_typeid_vector); +} +} // namespace impl + +// B22: ==== REMAINDER (x1, x2) +namespace impl +{ + +namespace remainder_fn_ns = dpctl::tensor::kernels::remainder; + +static binary_contig_impl_fn_ptr_t + remainder_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static int remainder_output_id_table[td_ns::num_types][td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + remainder_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +static binary_inplace_contig_impl_fn_ptr_t + remainder_inplace_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static binary_inplace_strided_impl_fn_ptr_t + remainder_inplace_strided_dispatch_table[td_ns::num_types] + [td_ns::num_types]; + +void populate_remainder_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = remainder_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::RemainderTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(remainder_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::RemainderStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(remainder_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::RemainderContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(remainder_contig_dispatch_table); + + // function pointers for inplace operation on general strided arrays + using fn_ns::RemainderInplaceStridedFactory; + DispatchTableBuilder + dtb4; + dtb4.populate_dispatch_table(remainder_inplace_strided_dispatch_table); + + // function pointers for inplace operation on contiguous inputs and output + using fn_ns::RemainderInplaceContigFactory; + DispatchTableBuilder + dtb5; + dtb5.populate_dispatch_table(remainder_inplace_contig_dispatch_table); +} + +} // namespace impl + +// U28: ==== ROUND (x) +namespace impl +{ + +namespace round_fn_ns = dpctl::tensor::kernels::round; + +static unary_contig_impl_fn_ptr_t + round_contig_dispatch_vector[td_ns::num_types]; +static int round_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + round_strided_dispatch_vector[td_ns::num_types]; + +void populate_round_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = round_fn_ns; + + using fn_ns::RoundContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(round_contig_dispatch_vector); + + using fn_ns::RoundStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(round_strided_dispatch_vector); + + using fn_ns::RoundTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(round_output_typeid_vector); +} + +} // namespace impl + +// U29: ==== SIGN (x) +namespace impl +{ + +namespace sign_fn_ns = dpctl::tensor::kernels::sign; + +static unary_contig_impl_fn_ptr_t sign_contig_dispatch_vector[td_ns::num_types]; +static int sign_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + sign_strided_dispatch_vector[td_ns::num_types]; + +void populate_sign_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = sign_fn_ns; + + using fn_ns::SignContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(sign_contig_dispatch_vector); + + using fn_ns::SignStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(sign_strided_dispatch_vector); + + using fn_ns::SignTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(sign_output_typeid_vector); +} + +} // namespace impl + +// ==== SIGNBIT (x) +namespace impl +{ + +namespace signbit_fn_ns = dpctl::tensor::kernels::signbit; + +static unary_contig_impl_fn_ptr_t + signbit_contig_dispatch_vector[td_ns::num_types]; +static int signbit_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + signbit_strided_dispatch_vector[td_ns::num_types]; + +void populate_signbit_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = signbit_fn_ns; + + using fn_ns::SignbitContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(signbit_contig_dispatch_vector); + + using fn_ns::SignbitStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(signbit_strided_dispatch_vector); + + using fn_ns::SignbitTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(signbit_output_typeid_vector); +} + +} // namespace impl + +// U30: ==== SIN (x) +namespace impl +{ + +namespace sin_fn_ns = dpctl::tensor::kernels::sin; + +static unary_contig_impl_fn_ptr_t sin_contig_dispatch_vector[td_ns::num_types]; +static int sin_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + sin_strided_dispatch_vector[td_ns::num_types]; + +void populate_sin_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = sin_fn_ns; + + using fn_ns::SinContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(sin_contig_dispatch_vector); + + using fn_ns::SinStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(sin_strided_dispatch_vector); + + using fn_ns::SinTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(sin_output_typeid_vector); +} + +} // namespace impl + +// U31: ==== SINH (x) +namespace impl +{ + +namespace sinh_fn_ns = dpctl::tensor::kernels::sinh; + +static unary_contig_impl_fn_ptr_t sinh_contig_dispatch_vector[td_ns::num_types]; +static int sinh_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + sinh_strided_dispatch_vector[td_ns::num_types]; + +void populate_sinh_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = sinh_fn_ns; + + using fn_ns::SinhContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(sinh_contig_dispatch_vector); + + using fn_ns::SinhStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(sinh_strided_dispatch_vector); + + using fn_ns::SinhTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(sinh_output_typeid_vector); +} + +} // namespace impl + +// U32: ==== SQUARE (x) +namespace impl +{ + +namespace square_fn_ns = dpctl::tensor::kernels::square; + +static unary_contig_impl_fn_ptr_t + square_contig_dispatch_vector[td_ns::num_types]; +static int square_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + square_strided_dispatch_vector[td_ns::num_types]; + +void populate_square_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = square_fn_ns; + + using fn_ns::SquareContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(square_contig_dispatch_vector); + + using fn_ns::SquareStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(square_strided_dispatch_vector); + + using fn_ns::SquareTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(square_output_typeid_vector); +} + +} // namespace impl + +// U33: ==== SQRT (x) +namespace impl +{ + +namespace sqrt_fn_ns = dpctl::tensor::kernels::sqrt; + +static unary_contig_impl_fn_ptr_t sqrt_contig_dispatch_vector[td_ns::num_types]; +static int sqrt_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + sqrt_strided_dispatch_vector[td_ns::num_types]; + +void populate_sqrt_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = sqrt_fn_ns; + + using fn_ns::SqrtContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(sqrt_contig_dispatch_vector); + + using fn_ns::SqrtStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(sqrt_strided_dispatch_vector); + + using fn_ns::SqrtTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(sqrt_output_typeid_vector); +} + +} // namespace impl + +// B23: ==== SUBTRACT (x1, x2) +namespace impl +{ +namespace subtract_fn_ns = dpctl::tensor::kernels::subtract; + +static binary_contig_impl_fn_ptr_t + subtract_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static int subtract_output_id_table[td_ns::num_types][td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + subtract_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +// sub(matrix, row) +static binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t + subtract_contig_matrix_contig_row_broadcast_dispatch_table + [td_ns::num_types][td_ns::num_types]; + +// sub(row, matrix) +static binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t + subtract_contig_row_contig_matrix_broadcast_dispatch_table + [td_ns::num_types][td_ns::num_types]; + +static binary_inplace_contig_impl_fn_ptr_t + subtract_inplace_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static binary_inplace_strided_impl_fn_ptr_t + subtract_inplace_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; +static binary_inplace_row_matrix_broadcast_impl_fn_ptr_t + subtract_inplace_row_matrix_dispatch_table[td_ns::num_types] + [td_ns::num_types]; + +void populate_subtract_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = subtract_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::SubtractTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(subtract_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::SubtractStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(subtract_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::SubtractContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(subtract_contig_dispatch_table); + + // function pointers for operation on contiguous matrix, contiguous row + // with contiguous matrix output + using fn_ns::SubtractContigMatrixContigRowBroadcastFactory; + DispatchTableBuilder< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t, + SubtractContigMatrixContigRowBroadcastFactory, num_types> + dtb4; + dtb4.populate_dispatch_table( + subtract_contig_matrix_contig_row_broadcast_dispatch_table); + + // function pointers for operation on contiguous row, contiguous matrix + // with contiguous matrix output + using fn_ns::SubtractContigRowContigMatrixBroadcastFactory; + DispatchTableBuilder< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t, + SubtractContigRowContigMatrixBroadcastFactory, num_types> + dtb5; + dtb5.populate_dispatch_table( + subtract_contig_row_contig_matrix_broadcast_dispatch_table); + + // function pointers for inplace operation on general strided arrays + using fn_ns::SubtractInplaceStridedFactory; + DispatchTableBuilder + dtb6; + dtb6.populate_dispatch_table(subtract_inplace_strided_dispatch_table); + + // function pointers for inplace operation on contiguous inputs and output + using fn_ns::SubtractInplaceContigFactory; + DispatchTableBuilder + dtb7; + dtb7.populate_dispatch_table(subtract_inplace_contig_dispatch_table); + + // function pointers for inplace operation on contiguous matrix + // and contiguous row + using fn_ns::SubtractInplaceRowMatrixBroadcastFactory; + DispatchTableBuilder + dtb8; + dtb8.populate_dispatch_table(subtract_inplace_row_matrix_dispatch_table); +}; + +} // namespace impl + +// U34: ==== TAN (x) +namespace impl +{ + +namespace tan_fn_ns = dpctl::tensor::kernels::tan; + +static unary_contig_impl_fn_ptr_t tan_contig_dispatch_vector[td_ns::num_types]; +static int tan_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + tan_strided_dispatch_vector[td_ns::num_types]; + +void populate_tan_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = tan_fn_ns; + + using fn_ns::TanContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(tan_contig_dispatch_vector); + + using fn_ns::TanStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(tan_strided_dispatch_vector); + + using fn_ns::TanTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(tan_output_typeid_vector); +} + +} // namespace impl + +// U35: ==== TANH (x) +namespace impl +{ + +namespace tanh_fn_ns = dpctl::tensor::kernels::tanh; + +static unary_contig_impl_fn_ptr_t tanh_contig_dispatch_vector[td_ns::num_types]; +static int tanh_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + tanh_strided_dispatch_vector[td_ns::num_types]; + +void populate_tanh_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = tanh_fn_ns; + + using fn_ns::TanhContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(tanh_contig_dispatch_vector); + + using fn_ns::TanhStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(tanh_strided_dispatch_vector); + + using fn_ns::TanhTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(tanh_output_typeid_vector); +} + +} // namespace impl + +// U36: ==== TRUNC (x) +namespace impl +{ + +namespace trunc_fn_ns = dpctl::tensor::kernels::trunc; + +static unary_contig_impl_fn_ptr_t + trunc_contig_dispatch_vector[td_ns::num_types]; +static int trunc_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + trunc_strided_dispatch_vector[td_ns::num_types]; + +void populate_trunc_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = trunc_fn_ns; + + using fn_ns::TruncContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(trunc_contig_dispatch_vector); + + using fn_ns::TruncStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(trunc_strided_dispatch_vector); + + using fn_ns::TruncTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(trunc_output_typeid_vector); +} + +} // namespace impl + +// U39: ==== RSQRT (x) +namespace impl +{ + +namespace rsqrt_fn_ns = dpctl::tensor::kernels::rsqrt; + +static unary_contig_impl_fn_ptr_t + rsqrt_contig_dispatch_vector[td_ns::num_types]; +static int rsqrt_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + rsqrt_strided_dispatch_vector[td_ns::num_types]; + +void populate_rsqrt_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = rsqrt_fn_ns; + + using fn_ns::RsqrtContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(rsqrt_contig_dispatch_vector); + + using fn_ns::RsqrtStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(rsqrt_strided_dispatch_vector); + + using fn_ns::RsqrtTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(rsqrt_output_typeid_vector); +} + +} // namespace impl + +// ========================================================================================== +// // + +namespace py = pybind11; + +void init_elementwise_functions1(py::module_ m) +{ + + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + + // B13: ==== LESS (x1, x2) + { + impl::populate_less_dispatch_tables(); + using impl::less_contig_dispatch_table; + using impl::less_output_id_table; + using impl::less_strided_dispatch_table; + + auto less_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, + const dpctl::tensor::usm_ndarray &src2, + const dpctl::tensor::usm_ndarray &dst, + sycl::queue &exec_q, + const std::vector &depends = {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, less_output_id_table, + // function pointers to handle operation on contiguous arrays + // (pointers may be nullptr) + less_contig_dispatch_table, + // function pointers to handle operation on strided arrays (most + // general case) + less_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + auto less_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + less_output_id_table); + }; + m.def("_less", less_pyapi, "", py::arg("src1"), py::arg("src2"), + py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_less_result_type", less_result_type_pyapi, ""); + } + + // B14: ==== LESS_EQUAL (x1, x2) + { + impl::populate_less_equal_dispatch_tables(); + using impl::less_equal_contig_dispatch_table; + using impl::less_equal_output_id_table; + using impl::less_equal_strided_dispatch_table; + + auto less_equal_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, + const dpctl::tensor::usm_ndarray &src2, + const dpctl::tensor::usm_ndarray &dst, + sycl::queue &exec_q, + const std::vector &depends = + {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, less_equal_output_id_table, + // function pointers to handle operation on contiguous arrays + // (pointers may be nullptr) + less_equal_contig_dispatch_table, + // function pointers to handle operation on strided arrays (most + // general case) + less_equal_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + auto less_equal_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + less_equal_output_id_table); + }; + m.def("_less_equal", less_equal_pyapi, "", py::arg("src1"), + py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_less_equal_result_type", less_equal_result_type_pyapi, ""); + } + + // U20: ==== LOG (x) + { + impl::populate_log_dispatch_vectors(); + using impl::log_contig_dispatch_vector; + using impl::log_output_typeid_vector; + using impl::log_strided_dispatch_vector; + + auto log_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, log_output_typeid_vector, + log_contig_dispatch_vector, log_strided_dispatch_vector); + }; + m.def("_log", log_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto log_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, log_output_typeid_vector); + }; + m.def("_log_result_type", log_result_type_pyapi); + } + + // U21: ==== LOG1P (x) + { + impl::populate_log1p_dispatch_vectors(); + using impl::log1p_contig_dispatch_vector; + using impl::log1p_output_typeid_vector; + using impl::log1p_strided_dispatch_vector; + + auto log1p_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, log1p_output_typeid_vector, + log1p_contig_dispatch_vector, log1p_strided_dispatch_vector); + }; + m.def("_log1p", log1p_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto log1p_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, + log1p_output_typeid_vector); + }; + m.def("_log1p_result_type", log1p_result_type_pyapi); + } + + // U22: ==== LOG2 (x) + { + impl::populate_log2_dispatch_vectors(); + + using impl::log2_contig_dispatch_vector; + using impl::log2_output_typeid_vector; + using impl::log2_strided_dispatch_vector; + auto log2_pyapi = [&](const dpctl::tensor::usm_ndarray &src, + const dpctl::tensor::usm_ndarray &dst, + sycl::queue &exec_q, + const std::vector &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, log2_output_typeid_vector, + log2_contig_dispatch_vector, log2_strided_dispatch_vector); + }; + auto log2_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, log2_output_typeid_vector); + }; + m.def("_log2", log2_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + m.def("_log2_result_type", log2_result_type_pyapi, ""); + } + + // U23: ==== LOG10 (x) + { + impl::populate_log10_dispatch_vectors(); + + using impl::log10_contig_dispatch_vector; + using impl::log10_output_typeid_vector; + using impl::log10_strided_dispatch_vector; + auto log10_pyapi = [&](const dpctl::tensor::usm_ndarray &src, + const dpctl::tensor::usm_ndarray &dst, + sycl::queue &exec_q, + const std::vector &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, log10_output_typeid_vector, + log10_contig_dispatch_vector, log10_strided_dispatch_vector); + }; + auto log10_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, + log10_output_typeid_vector); + }; + m.def("_log10", log10_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + m.def("_log10_result_type", log10_result_type_pyapi, ""); + } + + // B15: ==== LOGADDEXP (x1, x2) + { + impl::populate_logaddexp_dispatch_tables(); + using impl::logaddexp_contig_dispatch_table; + using impl::logaddexp_output_id_table; + using impl::logaddexp_strided_dispatch_table; + + auto logaddexp_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, + const dpctl::tensor::usm_ndarray &src2, + const dpctl::tensor::usm_ndarray &dst, + sycl::queue &exec_q, + const std::vector &depends = + {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, logaddexp_output_id_table, + // function pointers to handle operation on contiguous arrays + // (pointers may be nullptr) + logaddexp_contig_dispatch_table, + // function pointers to handle operation on strided arrays (most + // general case) + logaddexp_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + auto logaddexp_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + logaddexp_output_id_table); + }; + m.def("_logaddexp", logaddexp_pyapi, "", py::arg("src1"), + py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_logaddexp_result_type", logaddexp_result_type_pyapi, ""); + } + + // B16: ==== LOGICAL_AND (x1, x2) + { + impl::populate_logical_and_dispatch_tables(); + using impl::logical_and_contig_dispatch_table; + using impl::logical_and_output_id_table; + using impl::logical_and_strided_dispatch_table; + + auto logical_and_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, + const dpctl::tensor::usm_ndarray &src2, + const dpctl::tensor::usm_ndarray &dst, + sycl::queue &exec_q, + const std::vector &depends = + {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, logical_and_output_id_table, + // function pointers to handle operation on contiguous arrays + // (pointers may be nullptr) + logical_and_contig_dispatch_table, + // function pointers to handle operation on strided arrays (most + // general case) + logical_and_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + auto logical_and_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + logical_and_output_id_table); + }; + m.def("_logical_and", logical_and_pyapi, "", py::arg("src1"), + py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_logical_and_result_type", logical_and_result_type_pyapi, ""); + } + + // U24: ==== LOGICAL_NOT (x) + { + impl::populate_logical_not_dispatch_vectors(); + using impl::logical_not_contig_dispatch_vector; + using impl::logical_not_output_typeid_vector; + using impl::logical_not_strided_dispatch_vector; + + auto logical_not_pyapi = [&](const arrayT &src, arrayT dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc(src, dst, exec_q, depends, + logical_not_output_typeid_vector, + logical_not_contig_dispatch_vector, + logical_not_strided_dispatch_vector); + }; + m.def("_logical_not", logical_not_pyapi, "", py::arg("src"), + py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + + auto logical_not_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, + logical_not_output_typeid_vector); + }; + m.def("_logical_not_result_type", logical_not_result_type_pyapi); + } + + // B17: ==== LOGICAL_OR (x1, x2) + { + impl::populate_logical_or_dispatch_tables(); + using impl::logical_or_contig_dispatch_table; + using impl::logical_or_output_id_table; + using impl::logical_or_strided_dispatch_table; + + auto logical_or_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, + const dpctl::tensor::usm_ndarray &src2, + const dpctl::tensor::usm_ndarray &dst, + sycl::queue &exec_q, + const std::vector &depends = + {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, logical_or_output_id_table, + // function pointers to handle operation on contiguous arrays + // (pointers may be nullptr) + logical_or_contig_dispatch_table, + // function pointers to handle operation on strided arrays (most + // general case) + logical_or_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + auto logical_or_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + logical_or_output_id_table); + }; + m.def("_logical_or", logical_or_pyapi, "", py::arg("src1"), + py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_logical_or_result_type", logical_or_result_type_pyapi, ""); + } + + // B18: ==== LOGICAL_XOR (x1, x2) + { + impl::populate_logical_xor_dispatch_tables(); + using impl::logical_xor_contig_dispatch_table; + using impl::logical_xor_output_id_table; + using impl::logical_xor_strided_dispatch_table; + + auto logical_xor_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, + const dpctl::tensor::usm_ndarray &src2, + const dpctl::tensor::usm_ndarray &dst, + sycl::queue &exec_q, + const std::vector &depends = + {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, logical_xor_output_id_table, + // function pointers to handle operation on contiguous arrays + // (pointers may be nullptr) + logical_xor_contig_dispatch_table, + // function pointers to handle operation on strided arrays (most + // general case) + logical_xor_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + auto logical_xor_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + logical_xor_output_id_table); + }; + m.def("_logical_xor", logical_xor_pyapi, "", py::arg("src1"), + py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_logical_xor_result_type", logical_xor_result_type_pyapi, ""); + } + + // B??: ==== MAXIMUM (x1, x2) + { + impl::populate_maximum_dispatch_tables(); + using impl::maximum_contig_dispatch_table; + using impl::maximum_output_id_table; + using impl::maximum_strided_dispatch_table; + + auto maximum_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, + const dpctl::tensor::usm_ndarray &src2, + const dpctl::tensor::usm_ndarray &dst, + sycl::queue &exec_q, + const std::vector &depends = {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, maximum_output_id_table, + // function pointers to handle operation on contiguous + // arrays (pointers may be nullptr) + maximum_contig_dispatch_table, + // function pointers to handle operation on strided arrays + // (most general case) + maximum_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix + // and c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + // function pointers to handle operation of c-contig matrix + // and c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + auto maximum_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + maximum_output_id_table); + }; + m.def("_maximum", maximum_pyapi, "", py::arg("src1"), py::arg("src2"), + py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_maximum_result_type", maximum_result_type_pyapi, ""); + } + + // B??: ==== MINIMUM (x1, x2) + { + impl::populate_minimum_dispatch_tables(); + using impl::minimum_contig_dispatch_table; + using impl::minimum_output_id_table; + using impl::minimum_strided_dispatch_table; + + auto minimum_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, + const dpctl::tensor::usm_ndarray &src2, + const dpctl::tensor::usm_ndarray &dst, + sycl::queue &exec_q, + const std::vector &depends = {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, minimum_output_id_table, + // function pointers to handle operation on contiguous + // arrays (pointers may be nullptr) + minimum_contig_dispatch_table, + // function pointers to handle operation on strided arrays + // (most general case) + minimum_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix + // and c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + auto minimum_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + minimum_output_id_table); + }; + m.def("_minimum", minimum_pyapi, "", py::arg("src1"), py::arg("src2"), + py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_minimum_result_type", minimum_result_type_pyapi, ""); + } + + // B19: ==== MULTIPLY (x1, x2) + { + impl::populate_multiply_dispatch_tables(); + using impl::multiply_contig_dispatch_table; + using impl::multiply_contig_matrix_contig_row_broadcast_dispatch_table; + using impl::multiply_contig_row_contig_matrix_broadcast_dispatch_table; + using impl::multiply_output_id_table; + using impl::multiply_strided_dispatch_table; + + auto multiply_pyapi = + [&](const dpctl::tensor::usm_ndarray &src1, + const dpctl::tensor::usm_ndarray &src2, + const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, + const std::vector &depends = {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, multiply_output_id_table, + // function pointers to handle operation on contiguous + // arrays (pointers may be nullptr) + multiply_contig_dispatch_table, + // function pointers to handle operation on strided arrays + // (most general case) + multiply_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix + // and c-contig row with broadcasting (may be nullptr) + multiply_contig_matrix_contig_row_broadcast_dispatch_table, + // function pointers to handle operation of c-contig matrix + // and c-contig row with broadcasting (may be nullptr) + multiply_contig_row_contig_matrix_broadcast_dispatch_table); + }; + auto multiply_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + multiply_output_id_table); + }; + m.def("_multiply", multiply_pyapi, "", py::arg("src1"), py::arg("src2"), + py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_multiply_result_type", multiply_result_type_pyapi, ""); + + using impl::multiply_inplace_contig_dispatch_table; + using impl::multiply_inplace_row_matrix_dispatch_table; + using impl::multiply_inplace_strided_dispatch_table; + + auto multiply_inplace_pyapi = + [&](const dpctl::tensor::usm_ndarray &src, + const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, + const std::vector &depends = {}) { + return py_binary_inplace_ufunc( + src, dst, exec_q, depends, multiply_output_id_table, + // function pointers to handle inplace operation on + // contiguous arrays (pointers may be nullptr) + multiply_inplace_contig_dispatch_table, + // function pointers to handle inplace operation on strided + // arrays (most general case) + multiply_inplace_strided_dispatch_table, + // function pointers to handle inplace operation on + // c-contig matrix with c-contig row with broadcasting + // (may be nullptr) + multiply_inplace_row_matrix_dispatch_table); + }; + m.def("_multiply_inplace", multiply_inplace_pyapi, "", py::arg("lhs"), + py::arg("rhs"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + } + + // U25: ==== NEGATIVE (x) + { + impl::populate_negative_dispatch_vectors(); + using impl::negative_contig_dispatch_vector; + using impl::negative_output_typeid_vector; + using impl::negative_strided_dispatch_vector; + + auto negative_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc(src, dst, exec_q, depends, + negative_output_typeid_vector, + negative_contig_dispatch_vector, + negative_strided_dispatch_vector); + }; + m.def("_negative", negative_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto negative_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, + negative_output_typeid_vector); + }; + m.def("_negative_result_type", negative_result_type_pyapi); + } + + // B20: ==== NOT_EQUAL (x1, x2) + { + impl::populate_not_equal_dispatch_tables(); + using impl::not_equal_contig_dispatch_table; + using impl::not_equal_output_id_table; + using impl::not_equal_strided_dispatch_table; + + auto not_equal_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, + const dpctl::tensor::usm_ndarray &src2, + const dpctl::tensor::usm_ndarray &dst, + sycl::queue &exec_q, + const std::vector &depends = + {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, not_equal_output_id_table, + // function pointers to handle operation on contiguous arrays + // (pointers may be nullptr) + not_equal_contig_dispatch_table, + // function pointers to handle operation on strided arrays (most + // general case) + not_equal_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + auto not_equal_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + not_equal_output_id_table); + }; + m.def("_not_equal", not_equal_pyapi, "", py::arg("src1"), + py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_not_equal_result_type", not_equal_result_type_pyapi, ""); + } + + // U26: ==== POSITIVE (x) + { + impl::populate_positive_dispatch_vectors(); + using impl::positive_contig_dispatch_vector; + using impl::positive_output_typeid_vector; + using impl::positive_strided_dispatch_vector; + + auto positive_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc(src, dst, exec_q, depends, + positive_output_typeid_vector, + positive_contig_dispatch_vector, + positive_strided_dispatch_vector); + }; + m.def("_positive", positive_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto positive_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, + positive_output_typeid_vector); + }; + m.def("_positive_result_type", positive_result_type_pyapi); + } + + // B21: ==== POW (x1, x2) + { + impl::populate_pow_dispatch_tables(); + using impl::pow_contig_dispatch_table; + using impl::pow_output_id_table; + using impl::pow_strided_dispatch_table; + + auto pow_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, + const dpctl::tensor::usm_ndarray &src2, + const dpctl::tensor::usm_ndarray &dst, + sycl::queue &exec_q, + const std::vector &depends = {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, pow_output_id_table, + // function pointers to handle operation on contiguous arrays + // (pointers may be nullptr) + pow_contig_dispatch_table, + // function pointers to handle operation on strided arrays (most + // general case) + pow_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + auto pow_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + pow_output_id_table); + }; + m.def("_pow", pow_pyapi, "", py::arg("src1"), py::arg("src2"), + py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_pow_result_type", pow_result_type_pyapi, ""); + + using impl::pow_inplace_contig_dispatch_table; + using impl::pow_inplace_strided_dispatch_table; + + auto pow_inplace_pyapi = + [&](const dpctl::tensor::usm_ndarray &src, + const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, + const std::vector &depends = {}) { + return py_binary_inplace_ufunc( + src, dst, exec_q, depends, pow_output_id_table, + // function pointers to handle inplace operation on + // contiguous arrays (pointers may be nullptr) + pow_inplace_contig_dispatch_table, + // function pointers to handle inplace operation on strided + // arrays (most general case) + pow_inplace_strided_dispatch_table, + // function pointers to handle inplace operation on + // c-contig matrix with c-contig row with broadcasting + // (may be nullptr) + td_ns::NullPtrTable< + binary_inplace_row_matrix_broadcast_impl_fn_ptr_t>{}); + }; + m.def("_pow_inplace", pow_inplace_pyapi, "", py::arg("lhs"), + py::arg("rhs"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + } + + // U??: ==== PROJ (x) + { + impl::populate_proj_dispatch_vectors(); + using impl::proj_contig_dispatch_vector; + using impl::proj_output_typeid_vector; + using impl::proj_strided_dispatch_vector; + + auto proj_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, proj_output_typeid_vector, + proj_contig_dispatch_vector, proj_strided_dispatch_vector); + }; + m.def("_proj", proj_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto proj_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, proj_output_typeid_vector); + }; + m.def("_proj_result_type", proj_result_type_pyapi); + } + + // U27: ==== REAL (x) + { + impl::populate_real_dispatch_vectors(); + using impl::real_contig_dispatch_vector; + using impl::real_output_typeid_vector; + using impl::real_strided_dispatch_vector; + + auto real_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, real_output_typeid_vector, + real_contig_dispatch_vector, real_strided_dispatch_vector); + }; + m.def("_real", real_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto real_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, real_output_typeid_vector); + }; + m.def("_real_result_type", real_result_type_pyapi); + } + + // B22: ==== REMAINDER (x1, x2) + { + impl::populate_remainder_dispatch_tables(); + using impl::remainder_contig_dispatch_table; + using impl::remainder_output_id_table; + using impl::remainder_strided_dispatch_table; + + auto remainder_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, + const dpctl::tensor::usm_ndarray &src2, + const dpctl::tensor::usm_ndarray &dst, + sycl::queue &exec_q, + const std::vector &depends = + {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, remainder_output_id_table, + // function pointers to handle operation on contiguous arrays + // (pointers may be nullptr) + remainder_contig_dispatch_table, + // function pointers to handle operation on strided arrays (most + // general case) + remainder_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + auto remainder_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + remainder_output_id_table); + }; + m.def("_remainder", remainder_pyapi, "", py::arg("src1"), + py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_remainder_result_type", remainder_result_type_pyapi, ""); + + using impl::remainder_inplace_contig_dispatch_table; + using impl::remainder_inplace_strided_dispatch_table; + + auto remainder_inplace_pyapi = + [&](const dpctl::tensor::usm_ndarray &src, + const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, + const std::vector &depends = {}) { + return py_binary_inplace_ufunc( + src, dst, exec_q, depends, remainder_output_id_table, + // function pointers to handle inplace operation on + // contiguous arrays (pointers may be nullptr) + remainder_inplace_contig_dispatch_table, + // function pointers to handle inplace operation on strided + // arrays (most general case) + remainder_inplace_strided_dispatch_table, + // function pointers to handle inplace operation on + // c-contig matrix with c-contig row with broadcasting + // (may be nullptr) + td_ns::NullPtrTable< + binary_inplace_row_matrix_broadcast_impl_fn_ptr_t>{}); + }; + m.def("_remainder_inplace", remainder_inplace_pyapi, "", py::arg("lhs"), + py::arg("rhs"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + } + + // U28: ==== ROUND (x) + { + impl::populate_round_dispatch_vectors(); + using impl::round_contig_dispatch_vector; + using impl::round_output_typeid_vector; + using impl::round_strided_dispatch_vector; + + auto round_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, round_output_typeid_vector, + round_contig_dispatch_vector, round_strided_dispatch_vector); + }; + m.def("_round", round_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto round_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, + round_output_typeid_vector); + }; + m.def("_round_result_type", round_result_type_pyapi); + } + + // U29: ==== SIGN (x) + { + impl::populate_sign_dispatch_vectors(); + using impl::sign_contig_dispatch_vector; + using impl::sign_output_typeid_vector; + using impl::sign_strided_dispatch_vector; + + auto sign_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, sign_output_typeid_vector, + sign_contig_dispatch_vector, sign_strided_dispatch_vector); + }; + m.def("_sign", sign_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto sign_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, sign_output_typeid_vector); + }; + m.def("_sign_result_type", sign_result_type_pyapi); + } + + // ==== SIGNBIT (x) + { + impl::populate_signbit_dispatch_vectors(); + using impl::signbit_contig_dispatch_vector; + using impl::signbit_output_typeid_vector; + using impl::signbit_strided_dispatch_vector; + + auto signbit_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc(src, dst, exec_q, depends, + signbit_output_typeid_vector, + signbit_contig_dispatch_vector, + signbit_strided_dispatch_vector); + }; + m.def("_signbit", signbit_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto signbit_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, + signbit_output_typeid_vector); + }; + m.def("_signbit_result_type", signbit_result_type_pyapi); + } + + // U30: ==== SIN (x) + { + impl::populate_sin_dispatch_vectors(); + using impl::sin_contig_dispatch_vector; + using impl::sin_output_typeid_vector; + using impl::sin_strided_dispatch_vector; + + auto sin_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, sin_output_typeid_vector, + sin_contig_dispatch_vector, sin_strided_dispatch_vector); + }; + m.def("_sin", sin_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto sin_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, sin_output_typeid_vector); + }; + m.def("_sin_result_type", sin_result_type_pyapi); + } + // U31: ==== SINH (x) + { + impl::populate_sinh_dispatch_vectors(); + using impl::sinh_contig_dispatch_vector; + using impl::sinh_output_typeid_vector; + using impl::sinh_strided_dispatch_vector; + + auto sinh_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, sinh_output_typeid_vector, + sinh_contig_dispatch_vector, sinh_strided_dispatch_vector); + }; + m.def("_sinh", sinh_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto sinh_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, sinh_output_typeid_vector); + }; + m.def("_sinh_result_type", sinh_result_type_pyapi); + } + + // U32: ==== SQUARE (x) + { + impl::populate_square_dispatch_vectors(); + using impl::square_contig_dispatch_vector; + using impl::square_output_typeid_vector; + using impl::square_strided_dispatch_vector; + + auto square_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, square_output_typeid_vector, + square_contig_dispatch_vector, square_strided_dispatch_vector); + }; + m.def("_square", square_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto square_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, + square_output_typeid_vector); + }; + m.def("_square_result_type", square_result_type_pyapi); + } + + // U33: ==== SQRT (x) + { + impl::populate_sqrt_dispatch_vectors(); + using impl::sqrt_contig_dispatch_vector; + using impl::sqrt_output_typeid_vector; + using impl::sqrt_strided_dispatch_vector; + + auto sqrt_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, sqrt_output_typeid_vector, + sqrt_contig_dispatch_vector, sqrt_strided_dispatch_vector); + }; + m.def("_sqrt", sqrt_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto sqrt_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, sqrt_output_typeid_vector); + }; + m.def("_sqrt_result_type", sqrt_result_type_pyapi); + } + + // B23: ==== SUBTRACT (x1, x2) + { + impl::populate_subtract_dispatch_tables(); + using impl::subtract_contig_dispatch_table; + using impl::subtract_contig_matrix_contig_row_broadcast_dispatch_table; + using impl::subtract_contig_row_contig_matrix_broadcast_dispatch_table; + using impl::subtract_output_id_table; + using impl::subtract_strided_dispatch_table; + + auto subtract_pyapi = + [&](const dpctl::tensor::usm_ndarray &src1, + const dpctl::tensor::usm_ndarray &src2, + const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, + const std::vector &depends = {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, subtract_output_id_table, + // function pointers to handle operation on contiguous + // arrays (pointers may be nullptr) + subtract_contig_dispatch_table, + // function pointers to handle operation on strided arrays + // (most general case) + subtract_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix + // and c-contig row with broadcasting (may be nullptr) + subtract_contig_matrix_contig_row_broadcast_dispatch_table, + // function pointers to handle operation of c-contig matrix + // and c-contig row with broadcasting (may be nullptr) + subtract_contig_row_contig_matrix_broadcast_dispatch_table); + }; + auto subtract_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + subtract_output_id_table); + }; + m.def("_subtract", subtract_pyapi, "", py::arg("src1"), py::arg("src2"), + py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_subtract_result_type", subtract_result_type_pyapi, ""); + + using impl::subtract_inplace_contig_dispatch_table; + using impl::subtract_inplace_row_matrix_dispatch_table; + using impl::subtract_inplace_strided_dispatch_table; + + auto subtract_inplace_pyapi = + [&](const dpctl::tensor::usm_ndarray &src, + const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, + const std::vector &depends = {}) { + return py_binary_inplace_ufunc( + src, dst, exec_q, depends, subtract_output_id_table, + // function pointers to handle inplace operation on + // contiguous arrays (pointers may be nullptr) + subtract_inplace_contig_dispatch_table, + // function pointers to handle inplace operation on strided + // arrays (most general case) + subtract_inplace_strided_dispatch_table, + // function pointers to handle inplace operation on + // c-contig matrix with c-contig row with broadcasting + // (may be nullptr) + subtract_inplace_row_matrix_dispatch_table); + }; + m.def("_subtract_inplace", subtract_inplace_pyapi, "", py::arg("lhs"), + py::arg("rhs"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + } + + // U34: ==== TAN (x) + { + impl::populate_tan_dispatch_vectors(); + using impl::tan_contig_dispatch_vector; + using impl::tan_output_typeid_vector; + using impl::tan_strided_dispatch_vector; + + auto tan_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, tan_output_typeid_vector, + tan_contig_dispatch_vector, tan_strided_dispatch_vector); + }; + m.def("_tan", tan_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto tan_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, tan_output_typeid_vector); + }; + m.def("_tan_result_type", tan_result_type_pyapi); + } + + // U35: ==== TANH (x) + { + impl::populate_tanh_dispatch_vectors(); + using impl::tanh_contig_dispatch_vector; + using impl::tanh_output_typeid_vector; + using impl::tanh_strided_dispatch_vector; + + auto tanh_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, tanh_output_typeid_vector, + tanh_contig_dispatch_vector, tanh_strided_dispatch_vector); + }; + m.def("_tanh", tanh_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto tanh_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, tanh_output_typeid_vector); + }; + m.def("_tanh_result_type", tanh_result_type_pyapi); + } + + // U36: ==== TRUNC (x) + { + impl::populate_trunc_dispatch_vectors(); + using impl::trunc_contig_dispatch_vector; + using impl::trunc_output_typeid_vector; + using impl::trunc_strided_dispatch_vector; + + auto trunc_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, trunc_output_typeid_vector, + trunc_contig_dispatch_vector, trunc_strided_dispatch_vector); + }; + m.def("_trunc", trunc_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto trunc_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, + trunc_output_typeid_vector); + }; + m.def("_trunc_result_type", trunc_result_type_pyapi); + } + + // U39: ==== RSQRT (x) + { + impl::populate_rsqrt_dispatch_vectors(); + using impl::rsqrt_contig_dispatch_vector; + using impl::rsqrt_output_typeid_vector; + using impl::rsqrt_strided_dispatch_vector; + + auto rsqrt_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, rsqrt_output_typeid_vector, + rsqrt_contig_dispatch_vector, rsqrt_strided_dispatch_vector); + }; + m.def("_rsqrt", rsqrt_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto rsqrt_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, + rsqrt_output_typeid_vector); + }; + m.def("_rsqrt_result_type", rsqrt_result_type_pyapi); + } +} + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions1.hpp b/dpctl/tensor/libtensor/source/elementwise_functions1.hpp new file mode 100644 index 0000000000..9bc3976abf --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions1.hpp @@ -0,0 +1,40 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#pragma once +#include + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +extern void init_elementwise_functions1(py::module_ m); + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions.cpp b/dpctl/tensor/libtensor/source/elementwise_functions2.cpp similarity index 54% rename from dpctl/tensor/libtensor/source/elementwise_functions.cpp rename to dpctl/tensor/libtensor/source/elementwise_functions2.cpp index c42eea133f..4385a1e4c5 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions2.cpp @@ -31,6 +31,7 @@ #include #include "elementwise_functions.hpp" +#include "elementwise_functions2.hpp" #include "utils/type_dispatch.hpp" #include "kernels/elementwise_functions/abs.hpp" @@ -67,40 +68,7 @@ #include "kernels/elementwise_functions/isfinite.hpp" #include "kernels/elementwise_functions/isinf.hpp" #include "kernels/elementwise_functions/isnan.hpp" -#include "kernels/elementwise_functions/less.hpp" -#include "kernels/elementwise_functions/less_equal.hpp" -#include "kernels/elementwise_functions/log.hpp" -#include "kernels/elementwise_functions/log10.hpp" -#include "kernels/elementwise_functions/log1p.hpp" -#include "kernels/elementwise_functions/log2.hpp" -#include "kernels/elementwise_functions/logaddexp.hpp" -#include "kernels/elementwise_functions/logical_and.hpp" -#include "kernels/elementwise_functions/logical_not.hpp" -#include "kernels/elementwise_functions/logical_or.hpp" -#include "kernels/elementwise_functions/logical_xor.hpp" -#include "kernels/elementwise_functions/maximum.hpp" -#include "kernels/elementwise_functions/minimum.hpp" -#include "kernels/elementwise_functions/multiply.hpp" -#include "kernels/elementwise_functions/negative.hpp" -#include "kernels/elementwise_functions/not_equal.hpp" -#include "kernels/elementwise_functions/positive.hpp" -#include "kernels/elementwise_functions/pow.hpp" -#include "kernels/elementwise_functions/proj.hpp" -#include "kernels/elementwise_functions/real.hpp" -#include "kernels/elementwise_functions/remainder.hpp" -#include "kernels/elementwise_functions/round.hpp" -#include "kernels/elementwise_functions/rsqrt.hpp" -#include "kernels/elementwise_functions/sign.hpp" -#include "kernels/elementwise_functions/signbit.hpp" -#include "kernels/elementwise_functions/sin.hpp" -#include "kernels/elementwise_functions/sinh.hpp" -#include "kernels/elementwise_functions/sqrt.hpp" -#include "kernels/elementwise_functions/square.hpp" -#include "kernels/elementwise_functions/subtract.hpp" -#include "kernels/elementwise_functions/tan.hpp" -#include "kernels/elementwise_functions/tanh.hpp" #include "kernels/elementwise_functions/true_divide.hpp" -#include "kernels/elementwise_functions/trunc.hpp" namespace dpctl { @@ -111,52 +79,6 @@ namespace py_internal namespace td_ns = dpctl::tensor::type_dispatch; -py::dtype _dtype_from_typenum(td_ns::typenum_t dst_typenum_t) -{ - switch (dst_typenum_t) { - case td_ns::typenum_t::BOOL: - return py::dtype("?"); - case td_ns::typenum_t::INT8: - return py::dtype("i1"); - case td_ns::typenum_t::UINT8: - return py::dtype("u1"); - case td_ns::typenum_t::INT16: - return py::dtype("i2"); - case td_ns::typenum_t::UINT16: - return py::dtype("u2"); - case td_ns::typenum_t::INT32: - return py::dtype("i4"); - case td_ns::typenum_t::UINT32: - return py::dtype("u4"); - case td_ns::typenum_t::INT64: - return py::dtype("i8"); - case td_ns::typenum_t::UINT64: - return py::dtype("u8"); - case td_ns::typenum_t::HALF: - return py::dtype("f2"); - case td_ns::typenum_t::FLOAT: - return py::dtype("f4"); - case td_ns::typenum_t::DOUBLE: - return py::dtype("f8"); - case td_ns::typenum_t::CFLOAT: - return py::dtype("c8"); - case td_ns::typenum_t::CDOUBLE: - return py::dtype("c16"); - default: - throw py::value_error("Unrecognized dst_typeid"); - } -} - -int _result_typeid(int arg_typeid, const int *fn_output_id) -{ - if (arg_typeid < 0 || arg_typeid >= td_ns::num_types) { - throw py::value_error("Input typeid " + std::to_string(arg_typeid) + - " is outside of expected bounds."); - } - - return fn_output_id[arg_typeid]; -} - namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; using ew_cmn_ns::binary_contig_impl_fn_ptr_t; using ew_cmn_ns::binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t; @@ -1566,2747 +1488,389 @@ void populate_isnan_dispatch_vectors(void) } // namespace impl -// B13: ==== LESS (x1, x2) -namespace impl -{ -namespace less_fn_ns = dpctl::tensor::kernels::less; - -static binary_contig_impl_fn_ptr_t less_contig_dispatch_table[td_ns::num_types] - [td_ns::num_types]; -static int less_output_id_table[td_ns::num_types][td_ns::num_types]; - -static binary_strided_impl_fn_ptr_t - less_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; - -void populate_less_dispatch_tables(void) -{ - using namespace td_ns; - namespace fn_ns = less_fn_ns; - - // which input types are supported, and what is the type of the result - using fn_ns::LessTypeMapFactory; - DispatchTableBuilder dtb1; - dtb1.populate_dispatch_table(less_output_id_table); - - // function pointers for operation on general strided arrays - using fn_ns::LessStridedFactory; - DispatchTableBuilder - dtb2; - dtb2.populate_dispatch_table(less_strided_dispatch_table); - - // function pointers for operation on contiguous inputs and output - using fn_ns::LessContigFactory; - DispatchTableBuilder - dtb3; - dtb3.populate_dispatch_table(less_contig_dispatch_table); -}; -} // namespace impl - -// B14: ==== LESS_EQUAL (x1, x2) +// B24: ==== HYPOT (x1, x2) namespace impl { -namespace less_equal_fn_ns = dpctl::tensor::kernels::less_equal; +namespace hypot_fn_ns = dpctl::tensor::kernels::hypot; static binary_contig_impl_fn_ptr_t - less_equal_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; -static int less_equal_output_id_table[td_ns::num_types][td_ns::num_types]; + hypot_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static int hypot_output_id_table[td_ns::num_types][td_ns::num_types]; static binary_strided_impl_fn_ptr_t - less_equal_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + hypot_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; -void populate_less_equal_dispatch_tables(void) +void populate_hypot_dispatch_tables(void) { using namespace td_ns; - namespace fn_ns = less_equal_fn_ns; + namespace fn_ns = hypot_fn_ns; // which input types are supported, and what is the type of the result - using fn_ns::LessEqualTypeMapFactory; - DispatchTableBuilder dtb1; - dtb1.populate_dispatch_table(less_equal_output_id_table); + using fn_ns::HypotTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(hypot_output_id_table); // function pointers for operation on general strided arrays - using fn_ns::LessEqualStridedFactory; - DispatchTableBuilder dtb2; - dtb2.populate_dispatch_table(less_equal_strided_dispatch_table); + dtb2.populate_dispatch_table(hypot_strided_dispatch_table); // function pointers for operation on contiguous inputs and output - using fn_ns::LessEqualContigFactory; - DispatchTableBuilder dtb3; - dtb3.populate_dispatch_table(less_equal_contig_dispatch_table); + dtb3.populate_dispatch_table(hypot_contig_dispatch_table); }; -} // namespace impl - -// U20: ==== LOG (x) -namespace impl -{ - -namespace log_fn_ns = dpctl::tensor::kernels::log; - -static unary_contig_impl_fn_ptr_t log_contig_dispatch_vector[td_ns::num_types]; -static int log_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - log_strided_dispatch_vector[td_ns::num_types]; - -void populate_log_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = log_fn_ns; - - using fn_ns::LogContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(log_contig_dispatch_vector); - - using fn_ns::LogStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(log_strided_dispatch_vector); - - using fn_ns::LogTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(log_output_typeid_vector); -} } // namespace impl -// U21: ==== LOG1P (x) +// U37: ==== CBRT (x) namespace impl { -namespace log1p_fn_ns = dpctl::tensor::kernels::log1p; +namespace cbrt_fn_ns = dpctl::tensor::kernels::cbrt; -static unary_contig_impl_fn_ptr_t - log1p_contig_dispatch_vector[td_ns::num_types]; -static int log1p_output_typeid_vector[td_ns::num_types]; +static unary_contig_impl_fn_ptr_t cbrt_contig_dispatch_vector[td_ns::num_types]; +static int cbrt_output_typeid_vector[td_ns::num_types]; static unary_strided_impl_fn_ptr_t - log1p_strided_dispatch_vector[td_ns::num_types]; + cbrt_strided_dispatch_vector[td_ns::num_types]; -void populate_log1p_dispatch_vectors(void) +void populate_cbrt_dispatch_vectors(void) { using namespace td_ns; - namespace fn_ns = log1p_fn_ns; + namespace fn_ns = cbrt_fn_ns; - using fn_ns::Log1pContigFactory; - DispatchVectorBuilder dvb1; - dvb1.populate_dispatch_vector(log1p_contig_dispatch_vector); + dvb1.populate_dispatch_vector(cbrt_contig_dispatch_vector); - using fn_ns::Log1pStridedFactory; - DispatchVectorBuilder dvb2; - dvb2.populate_dispatch_vector(log1p_strided_dispatch_vector); + dvb2.populate_dispatch_vector(cbrt_strided_dispatch_vector); - using fn_ns::Log1pTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(log1p_output_typeid_vector); + using fn_ns::CbrtTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(cbrt_output_typeid_vector); } } // namespace impl -// U22: ==== LOG2 (x) -namespace impl -{ - -namespace log2_fn_ns = dpctl::tensor::kernels::log2; - -static unary_contig_impl_fn_ptr_t log2_contig_dispatch_vector[td_ns::num_types]; -static int log2_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - log2_strided_dispatch_vector[td_ns::num_types]; - -void populate_log2_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = log2_fn_ns; - - using fn_ns::Log2ContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(log2_contig_dispatch_vector); - - using fn_ns::Log2StridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(log2_strided_dispatch_vector); - - using fn_ns::Log2TypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(log2_output_typeid_vector); -}; - -} // namespace impl - -// U23: ==== LOG10 (x) -namespace impl -{ - -namespace log10_fn_ns = dpctl::tensor::kernels::log10; - -static unary_contig_impl_fn_ptr_t - log10_contig_dispatch_vector[td_ns::num_types]; -static int log10_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - log10_strided_dispatch_vector[td_ns::num_types]; - -void populate_log10_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = log10_fn_ns; - - using fn_ns::Log10ContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(log10_contig_dispatch_vector); - - using fn_ns::Log10StridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(log10_strided_dispatch_vector); - - using fn_ns::Log10TypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(log10_output_typeid_vector); -}; - -} // namespace impl - -// B15: ==== LOGADDEXP (x1, x2) +// B24: ==== COPYSIGN (x1, x2) namespace impl { -namespace logaddexp_fn_ns = dpctl::tensor::kernels::logaddexp; +namespace copysign_fn_ns = dpctl::tensor::kernels::copysign; static binary_contig_impl_fn_ptr_t - logaddexp_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; -static int logaddexp_output_id_table[td_ns::num_types][td_ns::num_types]; + copysign_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static int copysign_output_id_table[td_ns::num_types][td_ns::num_types]; static binary_strided_impl_fn_ptr_t - logaddexp_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + copysign_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; -void populate_logaddexp_dispatch_tables(void) +void populate_copysign_dispatch_tables(void) { using namespace td_ns; - namespace fn_ns = logaddexp_fn_ns; + namespace fn_ns = copysign_fn_ns; // which input types are supported, and what is the type of the result - using fn_ns::LogAddExpTypeMapFactory; - DispatchTableBuilder dtb1; - dtb1.populate_dispatch_table(logaddexp_output_id_table); + using fn_ns::CopysignTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(copysign_output_id_table); // function pointers for operation on general strided arrays - using fn_ns::LogAddExpStridedFactory; - DispatchTableBuilder dtb2; - dtb2.populate_dispatch_table(logaddexp_strided_dispatch_table); + dtb2.populate_dispatch_table(copysign_strided_dispatch_table); // function pointers for operation on contiguous inputs and output - using fn_ns::LogAddExpContigFactory; - DispatchTableBuilder dtb3; - dtb3.populate_dispatch_table(logaddexp_contig_dispatch_table); + dtb3.populate_dispatch_table(copysign_contig_dispatch_table); }; -} // namespace impl - -// B16: ==== LOGICAL_AND (x1, x2) -namespace impl -{ -namespace logical_and_fn_ns = dpctl::tensor::kernels::logical_and; - -static binary_contig_impl_fn_ptr_t - logical_and_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; -static int logical_and_output_id_table[td_ns::num_types][td_ns::num_types]; - -static binary_strided_impl_fn_ptr_t - logical_and_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; -void populate_logical_and_dispatch_tables(void) -{ - using namespace td_ns; - namespace fn_ns = logical_and_fn_ns; - - // which input types are supported, and what is the type of the result - using fn_ns::LogicalAndTypeMapFactory; - DispatchTableBuilder dtb1; - dtb1.populate_dispatch_table(logical_and_output_id_table); - - // function pointers for operation on general strided arrays - using fn_ns::LogicalAndStridedFactory; - DispatchTableBuilder - dtb2; - dtb2.populate_dispatch_table(logical_and_strided_dispatch_table); - - // function pointers for operation on contiguous inputs and output - using fn_ns::LogicalAndContigFactory; - DispatchTableBuilder - dtb3; - dtb3.populate_dispatch_table(logical_and_contig_dispatch_table); -}; } // namespace impl -// U24: ==== LOGICAL_NOT (x) +// U38: ==== EXP2 (x) namespace impl { -namespace logical_not_fn_ns = dpctl::tensor::kernels::logical_not; -static unary_contig_impl_fn_ptr_t - logical_not_contig_dispatch_vector[td_ns::num_types]; -static int logical_not_output_typeid_vector[td_ns::num_types]; +namespace exp2_fn_ns = dpctl::tensor::kernels::exp2; + +static unary_contig_impl_fn_ptr_t exp2_contig_dispatch_vector[td_ns::num_types]; +static int exp2_output_typeid_vector[td_ns::num_types]; static unary_strided_impl_fn_ptr_t - logical_not_strided_dispatch_vector[td_ns::num_types]; + exp2_strided_dispatch_vector[td_ns::num_types]; -void populate_logical_not_dispatch_vectors(void) +void populate_exp2_dispatch_vectors(void) { using namespace td_ns; - namespace fn_ns = logical_not_fn_ns; + namespace fn_ns = exp2_fn_ns; - using fn_ns::LogicalNotContigFactory; - DispatchVectorBuilder dvb1; - dvb1.populate_dispatch_vector(logical_not_contig_dispatch_vector); + dvb1.populate_dispatch_vector(exp2_contig_dispatch_vector); - using fn_ns::LogicalNotStridedFactory; - DispatchVectorBuilder dvb2; - dvb2.populate_dispatch_vector(logical_not_strided_dispatch_vector); + dvb2.populate_dispatch_vector(exp2_strided_dispatch_vector); - using fn_ns::LogicalNotTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(logical_not_output_typeid_vector); -}; -} // namespace impl + using fn_ns::Exp2TypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(exp2_output_typeid_vector); +} -// B17: ==== LOGICAL_OR (x1, x2) -namespace impl -{ -namespace logical_or_fn_ns = dpctl::tensor::kernels::logical_or; +} // namespace impl -static binary_contig_impl_fn_ptr_t - logical_or_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; -static int logical_or_output_id_table[td_ns::num_types][td_ns::num_types]; +// ========================================================================================== +// // -static binary_strided_impl_fn_ptr_t - logical_or_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; +namespace py = pybind11; -void populate_logical_or_dispatch_tables(void) +void init_elementwise_functions2(py::module_ m) { - using namespace td_ns; - namespace fn_ns = logical_or_fn_ns; - - // which input types are supported, and what is the type of the result - using fn_ns::LogicalOrTypeMapFactory; - DispatchTableBuilder dtb1; - dtb1.populate_dispatch_table(logical_or_output_id_table); - // function pointers for operation on general strided arrays - using fn_ns::LogicalOrStridedFactory; - DispatchTableBuilder - dtb2; - dtb2.populate_dispatch_table(logical_or_strided_dispatch_table); + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; - // function pointers for operation on contiguous inputs and output - using fn_ns::LogicalOrContigFactory; - DispatchTableBuilder - dtb3; - dtb3.populate_dispatch_table(logical_or_contig_dispatch_table); -}; -} // namespace impl + // U01: ==== ABS (x) + { + impl::populate_abs_dispatch_vectors(); + using impl::abs_contig_dispatch_vector; + using impl::abs_output_typeid_vector; + using impl::abs_strided_dispatch_vector; -// B18: ==== LOGICAL_XOR (x1, x2) -namespace impl -{ -namespace logical_xor_fn_ns = dpctl::tensor::kernels::logical_xor; + auto abs_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, abs_output_typeid_vector, + abs_contig_dispatch_vector, abs_strided_dispatch_vector); + }; + m.def("_abs", abs_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); -static binary_contig_impl_fn_ptr_t - logical_xor_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; -static int logical_xor_output_id_table[td_ns::num_types][td_ns::num_types]; + auto abs_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, abs_output_typeid_vector); + }; + m.def("_abs_result_type", abs_result_type_pyapi); + } -static binary_strided_impl_fn_ptr_t - logical_xor_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + // U02: ==== ACOS (x) + { + impl::populate_acos_dispatch_vectors(); + using impl::acos_contig_dispatch_vector; + using impl::acos_output_typeid_vector; + using impl::acos_strided_dispatch_vector; -void populate_logical_xor_dispatch_tables(void) -{ - using namespace td_ns; - namespace fn_ns = logical_xor_fn_ns; + auto acos_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, acos_output_typeid_vector, + acos_contig_dispatch_vector, acos_strided_dispatch_vector); + }; + m.def("_acos", acos_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); - // which input types are supported, and what is the type of the result - using fn_ns::LogicalXorTypeMapFactory; - DispatchTableBuilder dtb1; - dtb1.populate_dispatch_table(logical_xor_output_id_table); + auto acos_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, acos_output_typeid_vector); + }; + m.def("_acos_result_type", acos_result_type_pyapi); + } - // function pointers for operation on general strided arrays - using fn_ns::LogicalXorStridedFactory; - DispatchTableBuilder - dtb2; - dtb2.populate_dispatch_table(logical_xor_strided_dispatch_table); + // U03: ===== ACOSH (x) + { + impl::populate_acosh_dispatch_vectors(); + using impl::acosh_contig_dispatch_vector; + using impl::acosh_output_typeid_vector; + using impl::acosh_strided_dispatch_vector; - // function pointers for operation on contiguous inputs and output - using fn_ns::LogicalXorContigFactory; - DispatchTableBuilder - dtb3; - dtb3.populate_dispatch_table(logical_xor_contig_dispatch_table); -}; -} // namespace impl - -// B??: ==== MAXIMUM (x1, x2) -namespace impl -{ - -namespace maximum_fn_ns = dpctl::tensor::kernels::maximum; - -static binary_contig_impl_fn_ptr_t - maximum_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; -static int maximum_output_id_table[td_ns::num_types][td_ns::num_types]; - -static binary_strided_impl_fn_ptr_t - maximum_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; - -void populate_maximum_dispatch_tables(void) -{ - using namespace td_ns; - namespace fn_ns = maximum_fn_ns; - - // which input types are supported, and what is the type of the result - using fn_ns::MaximumTypeMapFactory; - DispatchTableBuilder dtb1; - dtb1.populate_dispatch_table(maximum_output_id_table); - - // function pointers for operation on general strided arrays - using fn_ns::MaximumStridedFactory; - DispatchTableBuilder - dtb2; - dtb2.populate_dispatch_table(maximum_strided_dispatch_table); - - // function pointers for operation on contiguous inputs and output - using fn_ns::MaximumContigFactory; - DispatchTableBuilder - dtb3; - dtb3.populate_dispatch_table(maximum_contig_dispatch_table); -}; - -} // namespace impl - -// B??: ==== MINIMUM (x1, x2) -namespace impl -{ - -namespace minimum_fn_ns = dpctl::tensor::kernels::minimum; - -static binary_contig_impl_fn_ptr_t - minimum_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; -static int minimum_output_id_table[td_ns::num_types][td_ns::num_types]; - -static binary_strided_impl_fn_ptr_t - minimum_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; - -void populate_minimum_dispatch_tables(void) -{ - using namespace td_ns; - namespace fn_ns = minimum_fn_ns; - - // which input types are supported, and what is the type of the result - using fn_ns::MinimumTypeMapFactory; - DispatchTableBuilder dtb1; - dtb1.populate_dispatch_table(minimum_output_id_table); - - // function pointers for operation on general strided arrays - using fn_ns::MinimumStridedFactory; - DispatchTableBuilder - dtb2; - dtb2.populate_dispatch_table(minimum_strided_dispatch_table); - - // function pointers for operation on contiguous inputs and output - using fn_ns::MinimumContigFactory; - DispatchTableBuilder - dtb3; - dtb3.populate_dispatch_table(minimum_contig_dispatch_table); -}; - -} // namespace impl - -// B19: ==== MULTIPLY (x1, x2) -namespace impl -{ - -namespace multiply_fn_ns = dpctl::tensor::kernels::multiply; - -static binary_contig_impl_fn_ptr_t - multiply_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; -static int multiply_output_id_table[td_ns::num_types][td_ns::num_types]; - -static binary_strided_impl_fn_ptr_t - multiply_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; - -// mul(matrix, row) -static binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t - multiply_contig_matrix_contig_row_broadcast_dispatch_table - [td_ns::num_types][td_ns::num_types]; - -// mul(row, matrix) -static binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t - multiply_contig_row_contig_matrix_broadcast_dispatch_table - [td_ns::num_types][td_ns::num_types]; - -static binary_inplace_contig_impl_fn_ptr_t - multiply_inplace_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; -static binary_inplace_strided_impl_fn_ptr_t - multiply_inplace_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; -static binary_inplace_row_matrix_broadcast_impl_fn_ptr_t - multiply_inplace_row_matrix_dispatch_table[td_ns::num_types] - [td_ns::num_types]; - -void populate_multiply_dispatch_tables(void) -{ - using namespace td_ns; - namespace fn_ns = multiply_fn_ns; - - // which input types are supported, and what is the type of the result - using fn_ns::MultiplyTypeMapFactory; - DispatchTableBuilder dtb1; - dtb1.populate_dispatch_table(multiply_output_id_table); - - // function pointers for operation on general strided arrays - using fn_ns::MultiplyStridedFactory; - DispatchTableBuilder - dtb2; - dtb2.populate_dispatch_table(multiply_strided_dispatch_table); - - // function pointers for operation on contiguous inputs and output - using fn_ns::MultiplyContigFactory; - DispatchTableBuilder - dtb3; - dtb3.populate_dispatch_table(multiply_contig_dispatch_table); - - // function pointers for operation on contiguous matrix, contiguous row - // with contiguous matrix output - using fn_ns::MultiplyContigMatrixContigRowBroadcastFactory; - DispatchTableBuilder< - binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t, - MultiplyContigMatrixContigRowBroadcastFactory, num_types> - dtb4; - dtb4.populate_dispatch_table( - multiply_contig_matrix_contig_row_broadcast_dispatch_table); - - // function pointers for operation on contiguous row, contiguous matrix - // with contiguous matrix output - using fn_ns::MultiplyContigRowContigMatrixBroadcastFactory; - DispatchTableBuilder< - binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t, - MultiplyContigRowContigMatrixBroadcastFactory, num_types> - dtb5; - dtb5.populate_dispatch_table( - multiply_contig_row_contig_matrix_broadcast_dispatch_table); - - // function pointers for inplace operation on general strided arrays - using fn_ns::MultiplyInplaceStridedFactory; - DispatchTableBuilder - dtb6; - dtb6.populate_dispatch_table(multiply_inplace_strided_dispatch_table); - - // function pointers for inplace operation on contiguous inputs and output - using fn_ns::MultiplyInplaceContigFactory; - DispatchTableBuilder - dtb7; - dtb7.populate_dispatch_table(multiply_inplace_contig_dispatch_table); - - // function pointers for inplace operation on contiguous matrix - // and contiguous row - using fn_ns::MultiplyInplaceRowMatrixBroadcastFactory; - DispatchTableBuilder - dtb8; - dtb8.populate_dispatch_table(multiply_inplace_row_matrix_dispatch_table); -}; - -} // namespace impl - -// U25: ==== NEGATIVE (x) -namespace impl -{ - -namespace negative_fn_ns = dpctl::tensor::kernels::negative; - -static unary_contig_impl_fn_ptr_t - negative_contig_dispatch_vector[td_ns::num_types]; -static int negative_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - negative_strided_dispatch_vector[td_ns::num_types]; - -void populate_negative_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = negative_fn_ns; - - using fn_ns::NegativeContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(negative_contig_dispatch_vector); - - using fn_ns::NegativeStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(negative_strided_dispatch_vector); - - using fn_ns::NegativeTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(negative_output_typeid_vector); -} - -} // namespace impl - -// B20: ==== NOT_EQUAL (x1, x2) -namespace impl -{ -namespace not_equal_fn_ns = dpctl::tensor::kernels::not_equal; - -static binary_contig_impl_fn_ptr_t - not_equal_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; -static int not_equal_output_id_table[td_ns::num_types][td_ns::num_types]; - -static binary_strided_impl_fn_ptr_t - not_equal_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; - -void populate_not_equal_dispatch_tables(void) -{ - using namespace td_ns; - namespace fn_ns = not_equal_fn_ns; - - // which input types are supported, and what is the type of the result - using fn_ns::NotEqualTypeMapFactory; - DispatchTableBuilder dtb1; - dtb1.populate_dispatch_table(not_equal_output_id_table); - - // function pointers for operation on general strided arrays - using fn_ns::NotEqualStridedFactory; - DispatchTableBuilder - dtb2; - dtb2.populate_dispatch_table(not_equal_strided_dispatch_table); - - // function pointers for operation on contiguous inputs and output - using fn_ns::NotEqualContigFactory; - DispatchTableBuilder - dtb3; - dtb3.populate_dispatch_table(not_equal_contig_dispatch_table); -}; -} // namespace impl - -// U26: ==== POSITIVE (x) -namespace impl -{ - -namespace positive_fn_ns = dpctl::tensor::kernels::positive; - -static unary_contig_impl_fn_ptr_t - positive_contig_dispatch_vector[td_ns::num_types]; -static int positive_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - positive_strided_dispatch_vector[td_ns::num_types]; - -void populate_positive_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = positive_fn_ns; - - using fn_ns::PositiveContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(positive_contig_dispatch_vector); - - using fn_ns::PositiveStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(positive_strided_dispatch_vector); - - using fn_ns::PositiveTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(positive_output_typeid_vector); -} - -} // namespace impl - -// B21: ==== POW (x1, x2) -namespace impl -{ - -namespace pow_fn_ns = dpctl::tensor::kernels::pow; - -static binary_contig_impl_fn_ptr_t pow_contig_dispatch_table[td_ns::num_types] - [td_ns::num_types]; -static int pow_output_id_table[td_ns::num_types][td_ns::num_types]; - -static binary_strided_impl_fn_ptr_t - pow_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; - -static binary_inplace_contig_impl_fn_ptr_t - pow_inplace_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; -static binary_inplace_strided_impl_fn_ptr_t - pow_inplace_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; - -void populate_pow_dispatch_tables(void) -{ - using namespace td_ns; - namespace fn_ns = pow_fn_ns; - - // which input types are supported, and what is the type of the result - using fn_ns::PowTypeMapFactory; - DispatchTableBuilder dtb1; - dtb1.populate_dispatch_table(pow_output_id_table); - - // function pointers for operation on general strided arrays - using fn_ns::PowStridedFactory; - DispatchTableBuilder - dtb2; - dtb2.populate_dispatch_table(pow_strided_dispatch_table); - - // function pointers for operation on contiguous inputs and output - using fn_ns::PowContigFactory; - DispatchTableBuilder - dtb3; - dtb3.populate_dispatch_table(pow_contig_dispatch_table); - - // function pointers for inplace operation on general strided arrays - using fn_ns::PowInplaceStridedFactory; - DispatchTableBuilder - dtb4; - dtb4.populate_dispatch_table(pow_inplace_strided_dispatch_table); - - // function pointers for inplace operation on contiguous inputs and output - using fn_ns::PowInplaceContigFactory; - DispatchTableBuilder - dtb5; - dtb5.populate_dispatch_table(pow_inplace_contig_dispatch_table); -}; - -} // namespace impl - -// U??: ==== PROJ (x) -namespace impl -{ - -namespace proj_fn_ns = dpctl::tensor::kernels::proj; - -static unary_contig_impl_fn_ptr_t proj_contig_dispatch_vector[td_ns::num_types]; -static int proj_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - proj_strided_dispatch_vector[td_ns::num_types]; - -void populate_proj_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = proj_fn_ns; - - using fn_ns::ProjContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(proj_contig_dispatch_vector); - - using fn_ns::ProjStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(proj_strided_dispatch_vector); - - using fn_ns::ProjTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(proj_output_typeid_vector); -} -} // namespace impl - -// U27: ==== REAL (x) -namespace impl -{ - -namespace real_fn_ns = dpctl::tensor::kernels::real; - -static unary_contig_impl_fn_ptr_t real_contig_dispatch_vector[td_ns::num_types]; -static int real_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - real_strided_dispatch_vector[td_ns::num_types]; - -void populate_real_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = real_fn_ns; - - using fn_ns::RealContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(real_contig_dispatch_vector); - - using fn_ns::RealStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(real_strided_dispatch_vector); - - using fn_ns::RealTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(real_output_typeid_vector); -} -} // namespace impl - -// B22: ==== REMAINDER (x1, x2) -namespace impl -{ - -namespace remainder_fn_ns = dpctl::tensor::kernels::remainder; - -static binary_contig_impl_fn_ptr_t - remainder_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; -static int remainder_output_id_table[td_ns::num_types][td_ns::num_types]; - -static binary_strided_impl_fn_ptr_t - remainder_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; - -static binary_inplace_contig_impl_fn_ptr_t - remainder_inplace_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; -static binary_inplace_strided_impl_fn_ptr_t - remainder_inplace_strided_dispatch_table[td_ns::num_types] - [td_ns::num_types]; - -void populate_remainder_dispatch_tables(void) -{ - using namespace td_ns; - namespace fn_ns = remainder_fn_ns; - - // which input types are supported, and what is the type of the result - using fn_ns::RemainderTypeMapFactory; - DispatchTableBuilder dtb1; - dtb1.populate_dispatch_table(remainder_output_id_table); - - // function pointers for operation on general strided arrays - using fn_ns::RemainderStridedFactory; - DispatchTableBuilder - dtb2; - dtb2.populate_dispatch_table(remainder_strided_dispatch_table); - - // function pointers for operation on contiguous inputs and output - using fn_ns::RemainderContigFactory; - DispatchTableBuilder - dtb3; - dtb3.populate_dispatch_table(remainder_contig_dispatch_table); - - // function pointers for inplace operation on general strided arrays - using fn_ns::RemainderInplaceStridedFactory; - DispatchTableBuilder - dtb4; - dtb4.populate_dispatch_table(remainder_inplace_strided_dispatch_table); - - // function pointers for inplace operation on contiguous inputs and output - using fn_ns::RemainderInplaceContigFactory; - DispatchTableBuilder - dtb5; - dtb5.populate_dispatch_table(remainder_inplace_contig_dispatch_table); -} - -} // namespace impl - -// U28: ==== ROUND (x) -namespace impl -{ - -namespace round_fn_ns = dpctl::tensor::kernels::round; - -static unary_contig_impl_fn_ptr_t - round_contig_dispatch_vector[td_ns::num_types]; -static int round_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - round_strided_dispatch_vector[td_ns::num_types]; - -void populate_round_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = round_fn_ns; - - using fn_ns::RoundContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(round_contig_dispatch_vector); - - using fn_ns::RoundStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(round_strided_dispatch_vector); - - using fn_ns::RoundTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(round_output_typeid_vector); -} - -} // namespace impl - -// U29: ==== SIGN (x) -namespace impl -{ - -namespace sign_fn_ns = dpctl::tensor::kernels::sign; - -static unary_contig_impl_fn_ptr_t sign_contig_dispatch_vector[td_ns::num_types]; -static int sign_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - sign_strided_dispatch_vector[td_ns::num_types]; - -void populate_sign_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = sign_fn_ns; - - using fn_ns::SignContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(sign_contig_dispatch_vector); - - using fn_ns::SignStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(sign_strided_dispatch_vector); - - using fn_ns::SignTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(sign_output_typeid_vector); -} - -} // namespace impl - -// ==== SIGNBIT (x) -namespace impl -{ - -namespace signbit_fn_ns = dpctl::tensor::kernels::signbit; - -static unary_contig_impl_fn_ptr_t - signbit_contig_dispatch_vector[td_ns::num_types]; -static int signbit_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - signbit_strided_dispatch_vector[td_ns::num_types]; - -void populate_signbit_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = signbit_fn_ns; - - using fn_ns::SignbitContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(signbit_contig_dispatch_vector); - - using fn_ns::SignbitStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(signbit_strided_dispatch_vector); - - using fn_ns::SignbitTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(signbit_output_typeid_vector); -} - -} // namespace impl - -// U30: ==== SIN (x) -namespace impl -{ - -namespace sin_fn_ns = dpctl::tensor::kernels::sin; - -static unary_contig_impl_fn_ptr_t sin_contig_dispatch_vector[td_ns::num_types]; -static int sin_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - sin_strided_dispatch_vector[td_ns::num_types]; - -void populate_sin_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = sin_fn_ns; - - using fn_ns::SinContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(sin_contig_dispatch_vector); - - using fn_ns::SinStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(sin_strided_dispatch_vector); - - using fn_ns::SinTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(sin_output_typeid_vector); -} - -} // namespace impl - -// U31: ==== SINH (x) -namespace impl -{ - -namespace sinh_fn_ns = dpctl::tensor::kernels::sinh; - -static unary_contig_impl_fn_ptr_t sinh_contig_dispatch_vector[td_ns::num_types]; -static int sinh_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - sinh_strided_dispatch_vector[td_ns::num_types]; - -void populate_sinh_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = sinh_fn_ns; - - using fn_ns::SinhContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(sinh_contig_dispatch_vector); - - using fn_ns::SinhStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(sinh_strided_dispatch_vector); - - using fn_ns::SinhTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(sinh_output_typeid_vector); -} - -} // namespace impl - -// U32: ==== SQUARE (x) -namespace impl -{ - -namespace square_fn_ns = dpctl::tensor::kernels::square; - -static unary_contig_impl_fn_ptr_t - square_contig_dispatch_vector[td_ns::num_types]; -static int square_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - square_strided_dispatch_vector[td_ns::num_types]; - -void populate_square_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = square_fn_ns; - - using fn_ns::SquareContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(square_contig_dispatch_vector); - - using fn_ns::SquareStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(square_strided_dispatch_vector); - - using fn_ns::SquareTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(square_output_typeid_vector); -} - -} // namespace impl - -// U33: ==== SQRT (x) -namespace impl -{ - -namespace sqrt_fn_ns = dpctl::tensor::kernels::sqrt; - -static unary_contig_impl_fn_ptr_t sqrt_contig_dispatch_vector[td_ns::num_types]; -static int sqrt_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - sqrt_strided_dispatch_vector[td_ns::num_types]; - -void populate_sqrt_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = sqrt_fn_ns; - - using fn_ns::SqrtContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(sqrt_contig_dispatch_vector); - - using fn_ns::SqrtStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(sqrt_strided_dispatch_vector); - - using fn_ns::SqrtTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(sqrt_output_typeid_vector); -} - -} // namespace impl - -// B23: ==== SUBTRACT (x1, x2) -namespace impl -{ -namespace subtract_fn_ns = dpctl::tensor::kernels::subtract; - -static binary_contig_impl_fn_ptr_t - subtract_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; -static int subtract_output_id_table[td_ns::num_types][td_ns::num_types]; - -static binary_strided_impl_fn_ptr_t - subtract_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; - -// sub(matrix, row) -static binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t - subtract_contig_matrix_contig_row_broadcast_dispatch_table - [td_ns::num_types][td_ns::num_types]; - -// sub(row, matrix) -static binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t - subtract_contig_row_contig_matrix_broadcast_dispatch_table - [td_ns::num_types][td_ns::num_types]; - -static binary_inplace_contig_impl_fn_ptr_t - subtract_inplace_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; -static binary_inplace_strided_impl_fn_ptr_t - subtract_inplace_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; -static binary_inplace_row_matrix_broadcast_impl_fn_ptr_t - subtract_inplace_row_matrix_dispatch_table[td_ns::num_types] - [td_ns::num_types]; - -void populate_subtract_dispatch_tables(void) -{ - using namespace td_ns; - namespace fn_ns = subtract_fn_ns; - - // which input types are supported, and what is the type of the result - using fn_ns::SubtractTypeMapFactory; - DispatchTableBuilder dtb1; - dtb1.populate_dispatch_table(subtract_output_id_table); - - // function pointers for operation on general strided arrays - using fn_ns::SubtractStridedFactory; - DispatchTableBuilder - dtb2; - dtb2.populate_dispatch_table(subtract_strided_dispatch_table); - - // function pointers for operation on contiguous inputs and output - using fn_ns::SubtractContigFactory; - DispatchTableBuilder - dtb3; - dtb3.populate_dispatch_table(subtract_contig_dispatch_table); - - // function pointers for operation on contiguous matrix, contiguous row - // with contiguous matrix output - using fn_ns::SubtractContigMatrixContigRowBroadcastFactory; - DispatchTableBuilder< - binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t, - SubtractContigMatrixContigRowBroadcastFactory, num_types> - dtb4; - dtb4.populate_dispatch_table( - subtract_contig_matrix_contig_row_broadcast_dispatch_table); - - // function pointers for operation on contiguous row, contiguous matrix - // with contiguous matrix output - using fn_ns::SubtractContigRowContigMatrixBroadcastFactory; - DispatchTableBuilder< - binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t, - SubtractContigRowContigMatrixBroadcastFactory, num_types> - dtb5; - dtb5.populate_dispatch_table( - subtract_contig_row_contig_matrix_broadcast_dispatch_table); - - // function pointers for inplace operation on general strided arrays - using fn_ns::SubtractInplaceStridedFactory; - DispatchTableBuilder - dtb6; - dtb6.populate_dispatch_table(subtract_inplace_strided_dispatch_table); - - // function pointers for inplace operation on contiguous inputs and output - using fn_ns::SubtractInplaceContigFactory; - DispatchTableBuilder - dtb7; - dtb7.populate_dispatch_table(subtract_inplace_contig_dispatch_table); - - // function pointers for inplace operation on contiguous matrix - // and contiguous row - using fn_ns::SubtractInplaceRowMatrixBroadcastFactory; - DispatchTableBuilder - dtb8; - dtb8.populate_dispatch_table(subtract_inplace_row_matrix_dispatch_table); -}; - -} // namespace impl - -// U34: ==== TAN (x) -namespace impl -{ - -namespace tan_fn_ns = dpctl::tensor::kernels::tan; - -static unary_contig_impl_fn_ptr_t tan_contig_dispatch_vector[td_ns::num_types]; -static int tan_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - tan_strided_dispatch_vector[td_ns::num_types]; - -void populate_tan_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = tan_fn_ns; - - using fn_ns::TanContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(tan_contig_dispatch_vector); - - using fn_ns::TanStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(tan_strided_dispatch_vector); - - using fn_ns::TanTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(tan_output_typeid_vector); -} - -} // namespace impl - -// U35: ==== TANH (x) -namespace impl -{ - -namespace tanh_fn_ns = dpctl::tensor::kernels::tanh; - -static unary_contig_impl_fn_ptr_t tanh_contig_dispatch_vector[td_ns::num_types]; -static int tanh_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - tanh_strided_dispatch_vector[td_ns::num_types]; - -void populate_tanh_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = tanh_fn_ns; - - using fn_ns::TanhContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(tanh_contig_dispatch_vector); - - using fn_ns::TanhStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(tanh_strided_dispatch_vector); - - using fn_ns::TanhTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(tanh_output_typeid_vector); -} - -} // namespace impl - -// U36: ==== TRUNC (x) -namespace impl -{ - -namespace trunc_fn_ns = dpctl::tensor::kernels::trunc; - -static unary_contig_impl_fn_ptr_t - trunc_contig_dispatch_vector[td_ns::num_types]; -static int trunc_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - trunc_strided_dispatch_vector[td_ns::num_types]; - -void populate_trunc_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = trunc_fn_ns; - - using fn_ns::TruncContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(trunc_contig_dispatch_vector); - - using fn_ns::TruncStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(trunc_strided_dispatch_vector); - - using fn_ns::TruncTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(trunc_output_typeid_vector); -} - -} // namespace impl - -// B24: ==== HYPOT (x1, x2) -namespace impl -{ -namespace hypot_fn_ns = dpctl::tensor::kernels::hypot; - -static binary_contig_impl_fn_ptr_t - hypot_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; -static int hypot_output_id_table[td_ns::num_types][td_ns::num_types]; - -static binary_strided_impl_fn_ptr_t - hypot_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; - -void populate_hypot_dispatch_tables(void) -{ - using namespace td_ns; - namespace fn_ns = hypot_fn_ns; - - // which input types are supported, and what is the type of the result - using fn_ns::HypotTypeMapFactory; - DispatchTableBuilder dtb1; - dtb1.populate_dispatch_table(hypot_output_id_table); - - // function pointers for operation on general strided arrays - using fn_ns::HypotStridedFactory; - DispatchTableBuilder - dtb2; - dtb2.populate_dispatch_table(hypot_strided_dispatch_table); - - // function pointers for operation on contiguous inputs and output - using fn_ns::HypotContigFactory; - DispatchTableBuilder - dtb3; - dtb3.populate_dispatch_table(hypot_contig_dispatch_table); -}; - -} // namespace impl - -// U37: ==== CBRT (x) -namespace impl -{ - -namespace cbrt_fn_ns = dpctl::tensor::kernels::cbrt; - -static unary_contig_impl_fn_ptr_t cbrt_contig_dispatch_vector[td_ns::num_types]; -static int cbrt_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - cbrt_strided_dispatch_vector[td_ns::num_types]; - -void populate_cbrt_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = cbrt_fn_ns; - - using fn_ns::CbrtContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(cbrt_contig_dispatch_vector); - - using fn_ns::CbrtStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(cbrt_strided_dispatch_vector); - - using fn_ns::CbrtTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(cbrt_output_typeid_vector); -} - -} // namespace impl - -// B24: ==== COPYSIGN (x1, x2) -namespace impl -{ -namespace copysign_fn_ns = dpctl::tensor::kernels::copysign; - -static binary_contig_impl_fn_ptr_t - copysign_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; -static int copysign_output_id_table[td_ns::num_types][td_ns::num_types]; - -static binary_strided_impl_fn_ptr_t - copysign_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; - -void populate_copysign_dispatch_tables(void) -{ - using namespace td_ns; - namespace fn_ns = copysign_fn_ns; - - // which input types are supported, and what is the type of the result - using fn_ns::CopysignTypeMapFactory; - DispatchTableBuilder dtb1; - dtb1.populate_dispatch_table(copysign_output_id_table); - - // function pointers for operation on general strided arrays - using fn_ns::CopysignStridedFactory; - DispatchTableBuilder - dtb2; - dtb2.populate_dispatch_table(copysign_strided_dispatch_table); - - // function pointers for operation on contiguous inputs and output - using fn_ns::CopysignContigFactory; - DispatchTableBuilder - dtb3; - dtb3.populate_dispatch_table(copysign_contig_dispatch_table); -}; - -} // namespace impl - -// U38: ==== EXP2 (x) -namespace impl -{ - -namespace exp2_fn_ns = dpctl::tensor::kernels::exp2; - -static unary_contig_impl_fn_ptr_t exp2_contig_dispatch_vector[td_ns::num_types]; -static int exp2_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - exp2_strided_dispatch_vector[td_ns::num_types]; - -void populate_exp2_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = exp2_fn_ns; - - using fn_ns::Exp2ContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(exp2_contig_dispatch_vector); - - using fn_ns::Exp2StridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(exp2_strided_dispatch_vector); - - using fn_ns::Exp2TypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(exp2_output_typeid_vector); -} - -} // namespace impl - -// U39: ==== RSQRT (x) -namespace impl -{ - -namespace rsqrt_fn_ns = dpctl::tensor::kernels::rsqrt; - -static unary_contig_impl_fn_ptr_t - rsqrt_contig_dispatch_vector[td_ns::num_types]; -static int rsqrt_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - rsqrt_strided_dispatch_vector[td_ns::num_types]; - -void populate_rsqrt_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = rsqrt_fn_ns; - - using fn_ns::RsqrtContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(rsqrt_contig_dispatch_vector); - - using fn_ns::RsqrtStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(rsqrt_strided_dispatch_vector); - - using fn_ns::RsqrtTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(rsqrt_output_typeid_vector); -} - -} // namespace impl - -// ========================================================================================== -// // - -namespace py = pybind11; - -void init_elementwise_functions(py::module_ m) -{ - using arrayT = dpctl::tensor::usm_ndarray; - using event_vecT = std::vector; - - // U01: ==== ABS (x) - { - impl::populate_abs_dispatch_vectors(); - using impl::abs_contig_dispatch_vector; - using impl::abs_output_typeid_vector; - using impl::abs_strided_dispatch_vector; - - auto abs_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, abs_output_typeid_vector, - abs_contig_dispatch_vector, abs_strided_dispatch_vector); - }; - m.def("_abs", abs_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto abs_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, abs_output_typeid_vector); - }; - m.def("_abs_result_type", abs_result_type_pyapi); - } - - // U02: ==== ACOS (x) - { - impl::populate_acos_dispatch_vectors(); - using impl::acos_contig_dispatch_vector; - using impl::acos_output_typeid_vector; - using impl::acos_strided_dispatch_vector; - - auto acos_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, acos_output_typeid_vector, - acos_contig_dispatch_vector, acos_strided_dispatch_vector); - }; - m.def("_acos", acos_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto acos_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, acos_output_typeid_vector); - }; - m.def("_acos_result_type", acos_result_type_pyapi); - } - - // U03: ===== ACOSH (x) - { - impl::populate_acosh_dispatch_vectors(); - using impl::acosh_contig_dispatch_vector; - using impl::acosh_output_typeid_vector; - using impl::acosh_strided_dispatch_vector; - - auto acosh_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, acosh_output_typeid_vector, - acosh_contig_dispatch_vector, acosh_strided_dispatch_vector); - }; - m.def("_acosh", acosh_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto acosh_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, - acosh_output_typeid_vector); - }; - m.def("_acosh_result_type", acosh_result_type_pyapi); - } - - // B01: ===== ADD (x1, x2) - { - impl::populate_add_dispatch_tables(); - using impl::add_contig_dispatch_table; - using impl::add_contig_matrix_contig_row_broadcast_dispatch_table; - using impl::add_contig_row_contig_matrix_broadcast_dispatch_table; - using impl::add_output_id_table; - using impl::add_strided_dispatch_table; - - auto add_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, - const dpctl::tensor::usm_ndarray &src2, - const dpctl::tensor::usm_ndarray &dst, - sycl::queue &exec_q, - const std::vector &depends = {}) { - return py_binary_ufunc( - src1, src2, dst, exec_q, depends, add_output_id_table, - // function pointers to handle operation on contiguous arrays - // (pointers may be nullptr) - add_contig_dispatch_table, - // function pointers to handle operation on strided arrays (most - // general case) - add_strided_dispatch_table, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - add_contig_matrix_contig_row_broadcast_dispatch_table, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - add_contig_row_contig_matrix_broadcast_dispatch_table); - }; - auto add_result_type_pyapi = [&](const py::dtype &dtype1, - const py::dtype &dtype2) { - return py_binary_ufunc_result_type(dtype1, dtype2, - add_output_id_table); - }; - m.def("_add", add_pyapi, "", py::arg("src1"), py::arg("src2"), - py::arg("dst"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - m.def("_add_result_type", add_result_type_pyapi, ""); - - using impl::add_inplace_contig_dispatch_table; - using impl::add_inplace_row_matrix_dispatch_table; - using impl::add_inplace_strided_dispatch_table; - - auto add_inplace_pyapi = - [&](const dpctl::tensor::usm_ndarray &src, - const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, - const std::vector &depends = {}) { - return py_binary_inplace_ufunc( - src, dst, exec_q, depends, add_output_id_table, - // function pointers to handle inplace operation on - // contiguous arrays (pointers may be nullptr) - add_inplace_contig_dispatch_table, - // function pointers to handle inplace operation on strided - // arrays (most general case) - add_inplace_strided_dispatch_table, - // function pointers to handle inplace operation on - // c-contig matrix with c-contig row with broadcasting - // (may be nullptr) - add_inplace_row_matrix_dispatch_table); - }; - m.def("_add_inplace", add_inplace_pyapi, "", py::arg("lhs"), - py::arg("rhs"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - } - - // U04: ===== ASIN (x) - { - impl::populate_asin_dispatch_vectors(); - using impl::asin_contig_dispatch_vector; - using impl::asin_output_typeid_vector; - using impl::asin_strided_dispatch_vector; - - auto asin_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, asin_output_typeid_vector, - asin_contig_dispatch_vector, asin_strided_dispatch_vector); - }; - m.def("_asin", asin_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto asin_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, asin_output_typeid_vector); - }; - m.def("_asin_result_type", asin_result_type_pyapi); - } - - // U05: ===== ASINH (x) - { - impl::populate_asinh_dispatch_vectors(); - using impl::asinh_contig_dispatch_vector; - using impl::asinh_output_typeid_vector; - using impl::asinh_strided_dispatch_vector; - - auto asinh_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, asinh_output_typeid_vector, - asinh_contig_dispatch_vector, asinh_strided_dispatch_vector); - }; - m.def("_asinh", asinh_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto asinh_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, - asinh_output_typeid_vector); - }; - m.def("_asinh_result_type", asinh_result_type_pyapi); - } - - // U06: ===== ATAN (x) - { - impl::populate_atan_dispatch_vectors(); - using impl::atan_contig_dispatch_vector; - using impl::atan_output_typeid_vector; - using impl::atan_strided_dispatch_vector; - - auto atan_pyapi = [&](arrayT src, arrayT dst, sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, atan_output_typeid_vector, - atan_contig_dispatch_vector, atan_strided_dispatch_vector); - }; - m.def("_atan", atan_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto atan_result_type_pyapi = [&](py::dtype dtype) { - return py_unary_ufunc_result_type(dtype, atan_output_typeid_vector); - }; - m.def("_atan_result_type", atan_result_type_pyapi); - } - - // B02: ===== ATAN2 (x1, x2) - { - impl::populate_atan2_dispatch_tables(); - using impl::atan2_contig_dispatch_table; - using impl::atan2_output_id_table; - using impl::atan2_strided_dispatch_table; - - auto atan2_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, - const dpctl::tensor::usm_ndarray &src2, - const dpctl::tensor::usm_ndarray &dst, - sycl::queue &exec_q, - const std::vector &depends = {}) { - return py_binary_ufunc( - src1, src2, dst, exec_q, depends, atan2_output_id_table, - // function pointers to handle operation on contiguous arrays - // (pointers may be nullptr) - atan2_contig_dispatch_table, - // function pointers to handle operation on strided arrays (most - // general case) - atan2_strided_dispatch_table, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); - }; - auto atan2_result_type_pyapi = [&](const py::dtype &dtype1, - const py::dtype &dtype2) { - return py_binary_ufunc_result_type(dtype1, dtype2, - atan2_output_id_table); - }; - m.def("_atan2", atan2_pyapi, "", py::arg("src1"), py::arg("src2"), - py::arg("dst"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - m.def("_atan2_result_type", atan2_result_type_pyapi, ""); - } - - // U07: ===== ATANH (x) - { - impl::populate_atanh_dispatch_vectors(); - using impl::atanh_contig_dispatch_vector; - using impl::atanh_output_typeid_vector; - using impl::atanh_strided_dispatch_vector; - - auto atanh_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, atanh_output_typeid_vector, - atanh_contig_dispatch_vector, atanh_strided_dispatch_vector); - }; - m.def("_atanh", atanh_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto atanh_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, - atanh_output_typeid_vector); - }; - m.def("_atanh_result_type", atanh_result_type_pyapi); - } - - // B03: ===== BITWISE_AND (x1, x2) - { - impl::populate_bitwise_and_dispatch_tables(); - using impl::bitwise_and_contig_dispatch_table; - using impl::bitwise_and_output_id_table; - using impl::bitwise_and_strided_dispatch_table; - - auto bitwise_and_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, - const dpctl::tensor::usm_ndarray &src2, - const dpctl::tensor::usm_ndarray &dst, - sycl::queue &exec_q, - const std::vector &depends = - {}) { - return py_binary_ufunc( - src1, src2, dst, exec_q, depends, bitwise_and_output_id_table, - // function pointers to handle operation on contiguous arrays - // (pointers may be nullptr) - bitwise_and_contig_dispatch_table, - // function pointers to handle operation on strided arrays (most - // general case) - bitwise_and_strided_dispatch_table, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); - }; - auto bitwise_and_result_type_pyapi = [&](const py::dtype &dtype1, - const py::dtype &dtype2) { - return py_binary_ufunc_result_type(dtype1, dtype2, - bitwise_and_output_id_table); - }; - m.def("_bitwise_and", bitwise_and_pyapi, "", py::arg("src1"), - py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - m.def("_bitwise_and_result_type", bitwise_and_result_type_pyapi, ""); - - using impl::bitwise_and_inplace_contig_dispatch_table; - using impl::bitwise_and_inplace_strided_dispatch_table; - - auto bitwise_and_inplace_pyapi = - [&](const dpctl::tensor::usm_ndarray &src, - const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, - const std::vector &depends = {}) { - return py_binary_inplace_ufunc( - src, dst, exec_q, depends, bitwise_and_output_id_table, - // function pointers to handle inplace operation on - // contiguous arrays (pointers may be nullptr) - bitwise_and_inplace_contig_dispatch_table, - // function pointers to handle inplace operation on strided - // arrays (most general case) - bitwise_and_inplace_strided_dispatch_table, - // function pointers to handle inplace operation on - // c-contig matrix with c-contig row with broadcasting - // (may be nullptr) - td_ns::NullPtrTable< - binary_inplace_row_matrix_broadcast_impl_fn_ptr_t>{}); - }; - m.def("_bitwise_and_inplace", bitwise_and_inplace_pyapi, "", - py::arg("lhs"), py::arg("rhs"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - } - - // B04: ===== BITWISE_LEFT_SHIFT (x1, x2) - { - impl::populate_bitwise_left_shift_dispatch_tables(); - using impl::bitwise_left_shift_contig_dispatch_table; - using impl::bitwise_left_shift_output_id_table; - using impl::bitwise_left_shift_strided_dispatch_table; - - auto bitwise_left_shift_pyapi = [&](const dpctl::tensor::usm_ndarray - &src1, - const dpctl::tensor::usm_ndarray - &src2, - const dpctl::tensor::usm_ndarray - &dst, - sycl::queue &exec_q, - const std::vector - &depends = {}) { - return py_binary_ufunc( - src1, src2, dst, exec_q, depends, - bitwise_left_shift_output_id_table, - // function pointers to handle operation on contiguous arrays - // (pointers may be nullptr) - bitwise_left_shift_contig_dispatch_table, - // function pointers to handle operation on strided arrays (most - // general case) - bitwise_left_shift_strided_dispatch_table, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); - }; - auto bitwise_left_shift_result_type_pyapi = - [&](const py::dtype &dtype1, const py::dtype &dtype2) { - return py_binary_ufunc_result_type( - dtype1, dtype2, bitwise_left_shift_output_id_table); - }; - m.def("_bitwise_left_shift", bitwise_left_shift_pyapi, "", - py::arg("src1"), py::arg("src2"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - m.def("_bitwise_left_shift_result_type", - bitwise_left_shift_result_type_pyapi, ""); - - using impl::bitwise_left_shift_inplace_contig_dispatch_table; - using impl::bitwise_left_shift_inplace_strided_dispatch_table; - - auto bitwise_left_shift_inplace_pyapi = - [&](const dpctl::tensor::usm_ndarray &src, - const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, - const std::vector &depends = {}) { - return py_binary_inplace_ufunc( - src, dst, exec_q, depends, - bitwise_left_shift_output_id_table, - // function pointers to handle inplace operation on - // contiguous arrays (pointers may be nullptr) - bitwise_left_shift_inplace_contig_dispatch_table, - // function pointers to handle inplace operation on strided - // arrays (most general case) - bitwise_left_shift_inplace_strided_dispatch_table, - // function pointers to handle inplace operation on - // c-contig matrix with c-contig row with broadcasting - // (may be nullptr) - td_ns::NullPtrTable< - binary_inplace_row_matrix_broadcast_impl_fn_ptr_t>{}); - }; - m.def("_bitwise_left_shift_inplace", bitwise_left_shift_inplace_pyapi, - "", py::arg("lhs"), py::arg("rhs"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - } - - // U08: ===== BITWISE_INVERT (x) - { - impl::populate_bitwise_invert_dispatch_vectors(); - using impl::bitwise_invert_contig_dispatch_vector; - using impl::bitwise_invert_output_typeid_vector; - using impl::bitwise_invert_strided_dispatch_vector; - - auto bitwise_invert_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc(src, dst, exec_q, depends, - bitwise_invert_output_typeid_vector, - bitwise_invert_contig_dispatch_vector, - bitwise_invert_strided_dispatch_vector); - }; - m.def("_bitwise_invert", bitwise_invert_pyapi, "", py::arg("src"), - py::arg("dst"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - - auto bitwise_invert_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type( - dtype, bitwise_invert_output_typeid_vector); - }; - m.def("_bitwise_invert_result_type", bitwise_invert_result_type_pyapi); - } - - // B05: ===== BITWISE_OR (x1, x2) - { - impl::populate_bitwise_or_dispatch_tables(); - using impl::bitwise_or_contig_dispatch_table; - using impl::bitwise_or_output_id_table; - using impl::bitwise_or_strided_dispatch_table; - - auto bitwise_or_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, - const dpctl::tensor::usm_ndarray &src2, - const dpctl::tensor::usm_ndarray &dst, - sycl::queue &exec_q, - const std::vector &depends = - {}) { - return py_binary_ufunc( - src1, src2, dst, exec_q, depends, bitwise_or_output_id_table, - // function pointers to handle operation on contiguous arrays - // (pointers may be nullptr) - bitwise_or_contig_dispatch_table, - // function pointers to handle operation on strided arrays (most - // general case) - bitwise_or_strided_dispatch_table, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); - }; - auto bitwise_or_result_type_pyapi = [&](const py::dtype &dtype1, - const py::dtype &dtype2) { - return py_binary_ufunc_result_type(dtype1, dtype2, - bitwise_or_output_id_table); - }; - m.def("_bitwise_or", bitwise_or_pyapi, "", py::arg("src1"), - py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - m.def("_bitwise_or_result_type", bitwise_or_result_type_pyapi, ""); - - using impl::bitwise_or_inplace_contig_dispatch_table; - using impl::bitwise_or_inplace_strided_dispatch_table; - - auto bitwise_or_inplace_pyapi = - [&](const dpctl::tensor::usm_ndarray &src, - const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, - const std::vector &depends = {}) { - return py_binary_inplace_ufunc( - src, dst, exec_q, depends, bitwise_or_output_id_table, - // function pointers to handle inplace operation on - // contiguous arrays (pointers may be nullptr) - bitwise_or_inplace_contig_dispatch_table, - // function pointers to handle inplace operation on strided - // arrays (most general case) - bitwise_or_inplace_strided_dispatch_table, - // function pointers to handle inplace operation on - // c-contig matrix with c-contig row with broadcasting - // (may be nullptr) - td_ns::NullPtrTable< - binary_inplace_row_matrix_broadcast_impl_fn_ptr_t>{}); - }; - m.def("_bitwise_or_inplace", bitwise_or_inplace_pyapi, "", - py::arg("lhs"), py::arg("rhs"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - } - - // B06: ===== BITWISE_RIGHT_SHIFT (x1, x2) - { - impl::populate_bitwise_right_shift_dispatch_tables(); - using impl::bitwise_right_shift_contig_dispatch_table; - using impl::bitwise_right_shift_output_id_table; - using impl::bitwise_right_shift_strided_dispatch_table; - - auto bitwise_right_shift_pyapi = [&](const dpctl::tensor::usm_ndarray - &src1, - const dpctl::tensor::usm_ndarray - &src2, - const dpctl::tensor::usm_ndarray - &dst, - sycl::queue &exec_q, - const std::vector - &depends = {}) { - return py_binary_ufunc( - src1, src2, dst, exec_q, depends, - bitwise_right_shift_output_id_table, - // function pointers to handle operation on contiguous arrays - // (pointers may be nullptr) - bitwise_right_shift_contig_dispatch_table, - // function pointers to handle operation on strided arrays (most - // general case) - bitwise_right_shift_strided_dispatch_table, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); - }; - auto bitwise_right_shift_result_type_pyapi = - [&](const py::dtype &dtype1, const py::dtype &dtype2) { - return py_binary_ufunc_result_type( - dtype1, dtype2, bitwise_right_shift_output_id_table); - }; - m.def("_bitwise_right_shift", bitwise_right_shift_pyapi, "", - py::arg("src1"), py::arg("src2"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - m.def("_bitwise_right_shift_result_type", - bitwise_right_shift_result_type_pyapi, ""); - - using impl::bitwise_right_shift_inplace_contig_dispatch_table; - using impl::bitwise_right_shift_inplace_strided_dispatch_table; - - auto bitwise_right_shift_inplace_pyapi = - [&](const dpctl::tensor::usm_ndarray &src, - const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, - const std::vector &depends = {}) { - return py_binary_inplace_ufunc( - src, dst, exec_q, depends, - bitwise_right_shift_output_id_table, - // function pointers to handle inplace operation on - // contiguous arrays (pointers may be nullptr) - bitwise_right_shift_inplace_contig_dispatch_table, - // function pointers to handle inplace operation on strided - // arrays (most general case) - bitwise_right_shift_inplace_strided_dispatch_table, - // function pointers to handle inplace operation on - // c-contig matrix with c-contig row with broadcasting - // (may be nullptr) - td_ns::NullPtrTable< - binary_inplace_row_matrix_broadcast_impl_fn_ptr_t>{}); - }; - m.def("_bitwise_right_shift_inplace", bitwise_right_shift_inplace_pyapi, - "", py::arg("lhs"), py::arg("rhs"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - } - - // B07: ===== BITWISE_XOR (x1, x2) - { - impl::populate_bitwise_xor_dispatch_tables(); - using impl::bitwise_xor_contig_dispatch_table; - using impl::bitwise_xor_output_id_table; - using impl::bitwise_xor_strided_dispatch_table; - - auto bitwise_xor_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, - const dpctl::tensor::usm_ndarray &src2, - const dpctl::tensor::usm_ndarray &dst, - sycl::queue &exec_q, - const std::vector &depends = - {}) { - return py_binary_ufunc( - src1, src2, dst, exec_q, depends, bitwise_xor_output_id_table, - // function pointers to handle operation on contiguous arrays - // (pointers may be nullptr) - bitwise_xor_contig_dispatch_table, - // function pointers to handle operation on strided arrays (most - // general case) - bitwise_xor_strided_dispatch_table, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); - }; - auto bitwise_xor_result_type_pyapi = [&](const py::dtype &dtype1, - const py::dtype &dtype2) { - return py_binary_ufunc_result_type(dtype1, dtype2, - bitwise_xor_output_id_table); - }; - m.def("_bitwise_xor", bitwise_xor_pyapi, "", py::arg("src1"), - py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - m.def("_bitwise_xor_result_type", bitwise_xor_result_type_pyapi, ""); - - using impl::bitwise_xor_inplace_contig_dispatch_table; - using impl::bitwise_xor_inplace_strided_dispatch_table; - - auto bitwise_xor_inplace_pyapi = - [&](const dpctl::tensor::usm_ndarray &src, - const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, - const std::vector &depends = {}) { - return py_binary_inplace_ufunc( - src, dst, exec_q, depends, bitwise_xor_output_id_table, - // function pointers to handle inplace operation on - // contiguous arrays (pointers may be nullptr) - bitwise_xor_inplace_contig_dispatch_table, - // function pointers to handle inplace operation on strided - // arrays (most general case) - bitwise_xor_inplace_strided_dispatch_table, - // function pointers to handle inplace operation on - // c-contig matrix with c-contig row with broadcasting - // (may be nullptr) - td_ns::NullPtrTable< - binary_inplace_row_matrix_broadcast_impl_fn_ptr_t>{}); - }; - m.def("_bitwise_xor_inplace", bitwise_xor_inplace_pyapi, "", - py::arg("lhs"), py::arg("rhs"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - } - - // U09: ==== CEIL (x) - { - impl::populate_ceil_dispatch_vectors(); - using impl::ceil_contig_dispatch_vector; - using impl::ceil_output_typeid_vector; - using impl::ceil_strided_dispatch_vector; - - auto ceil_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, ceil_output_typeid_vector, - ceil_contig_dispatch_vector, ceil_strided_dispatch_vector); - }; - m.def("_ceil", ceil_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto ceil_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, ceil_output_typeid_vector); - }; - m.def("_ceil_result_type", ceil_result_type_pyapi); - } - - // U10: ==== CONJ (x) - { - impl::populate_conj_dispatch_vectors(); - using impl::conj_contig_dispatch_vector; - using impl::conj_output_typeid_vector; - using impl::conj_strided_dispatch_vector; - - auto conj_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, conj_output_typeid_vector, - conj_contig_dispatch_vector, conj_strided_dispatch_vector); - }; - m.def("_conj", conj_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto conj_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, conj_output_typeid_vector); - }; - m.def("_conj_result_type", conj_result_type_pyapi); - } - - // U11: ==== COS (x) - { - impl::populate_cos_dispatch_vectors(); - using impl::cos_contig_dispatch_vector; - using impl::cos_output_typeid_vector; - using impl::cos_strided_dispatch_vector; - - auto cos_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, cos_output_typeid_vector, - cos_contig_dispatch_vector, cos_strided_dispatch_vector); - }; - m.def("_cos", cos_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto cos_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, cos_output_typeid_vector); - }; - m.def("_cos_result_type", cos_result_type_pyapi); - } - - // U12: ==== COSH (x) - { - impl::populate_cosh_dispatch_vectors(); - using impl::cosh_contig_dispatch_vector; - using impl::cosh_output_typeid_vector; - using impl::cosh_strided_dispatch_vector; - - auto cosh_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, cosh_output_typeid_vector, - cosh_contig_dispatch_vector, cosh_strided_dispatch_vector); - }; - m.def("_cosh", cosh_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto cosh_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, cosh_output_typeid_vector); - }; - m.def("_cosh_result_type", cosh_result_type_pyapi); - } - - // B08: ==== DIVIDE (x1, x2) - { - impl::populate_true_divide_dispatch_tables(); - using impl::true_divide_contig_dispatch_table; - using impl:: - true_divide_contig_matrix_contig_row_broadcast_dispatch_table; - using impl:: - true_divide_contig_row_contig_matrix_broadcast_dispatch_table; - using impl::true_divide_output_id_table; - using impl::true_divide_strided_dispatch_table; - - auto divide_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, - const dpctl::tensor::usm_ndarray &src2, - const dpctl::tensor::usm_ndarray &dst, - sycl::queue &exec_q, - const std::vector &depends = {}) { - return py_binary_ufunc( - src1, src2, dst, exec_q, depends, true_divide_output_id_table, - // function pointers to handle operation on contiguous arrays - // (pointers may be nullptr) - true_divide_contig_dispatch_table, - // function pointers to handle operation on strided arrays (most - // general case) - true_divide_strided_dispatch_table, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - true_divide_contig_matrix_contig_row_broadcast_dispatch_table, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - true_divide_contig_row_contig_matrix_broadcast_dispatch_table); - }; - auto divide_result_type_pyapi = [&](const py::dtype &dtype1, - const py::dtype &dtype2) { - return py_binary_ufunc_result_type(dtype1, dtype2, - true_divide_output_id_table); - }; - m.def("_divide", divide_pyapi, "", py::arg("src1"), py::arg("src2"), - py::arg("dst"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - m.def("_divide_result_type", divide_result_type_pyapi, ""); - - using impl::true_divide_inplace_contig_dispatch_table; - using impl::true_divide_inplace_output_id_table; - using impl::true_divide_inplace_row_matrix_dispatch_table; - using impl::true_divide_inplace_strided_dispatch_table; - - auto divide_inplace_pyapi = - [&](const dpctl::tensor::usm_ndarray &src, - const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, - const std::vector &depends = {}) { - return py_binary_inplace_ufunc( - src, dst, exec_q, depends, - true_divide_inplace_output_id_table, - // function pointers to handle inplace operation on - // contiguous arrays (pointers may be nullptr) - true_divide_inplace_contig_dispatch_table, - // function pointers to handle inplace operation on strided - // arrays (most general case) - true_divide_inplace_strided_dispatch_table, - // function pointers to handle inplace operation on - // c-contig matrix with c-contig row with broadcasting - // (may be nullptr) - true_divide_inplace_row_matrix_dispatch_table); - }; - m.def("_divide_inplace", divide_inplace_pyapi, "", py::arg("lhs"), - py::arg("rhs"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - } - - // B09: ==== EQUAL (x1, x2) - { - impl::populate_equal_dispatch_tables(); - using impl::equal_contig_dispatch_table; - using impl::equal_output_id_table; - using impl::equal_strided_dispatch_table; - - auto equal_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, - const dpctl::tensor::usm_ndarray &src2, - const dpctl::tensor::usm_ndarray &dst, - sycl::queue &exec_q, - const std::vector &depends = {}) { - return py_binary_ufunc( - src1, src2, dst, exec_q, depends, equal_output_id_table, - // function pointers to handle operation on contiguous arrays - // (pointers may be nullptr) - equal_contig_dispatch_table, - // function pointers to handle operation on strided arrays (most - // general case) - equal_strided_dispatch_table, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); - }; - auto equal_result_type_pyapi = [&](const py::dtype &dtype1, - const py::dtype &dtype2) { - return py_binary_ufunc_result_type(dtype1, dtype2, - equal_output_id_table); - }; - m.def("_equal", equal_pyapi, "", py::arg("src1"), py::arg("src2"), - py::arg("dst"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - m.def("_equal_result_type", equal_result_type_pyapi, ""); - } - - // U13: ==== EXP (x) - { - impl::populate_exp_dispatch_vectors(); - using impl::exp_contig_dispatch_vector; - using impl::exp_output_typeid_vector; - using impl::exp_strided_dispatch_vector; - - auto exp_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, exp_output_typeid_vector, - exp_contig_dispatch_vector, exp_strided_dispatch_vector); - }; - m.def("_exp", exp_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto exp_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, exp_output_typeid_vector); - }; - m.def("_exp_result_type", exp_result_type_pyapi); - } - - // U14: ==== EXPM1 (x) - { - impl::populate_expm1_dispatch_vectors(); - using impl::expm1_contig_dispatch_vector; - using impl::expm1_output_typeid_vector; - using impl::expm1_strided_dispatch_vector; - - auto expm1_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, expm1_output_typeid_vector, - expm1_contig_dispatch_vector, expm1_strided_dispatch_vector); - }; - m.def("_expm1", expm1_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto expm1_result_type_pyapi = [&](const py::dtype dtype) { - return py_unary_ufunc_result_type(dtype, - expm1_output_typeid_vector); - }; - m.def("_expm1_result_type", expm1_result_type_pyapi); - } - - // U15: ==== FLOOR (x) - { - impl::populate_floor_dispatch_vectors(); - using impl::floor_contig_dispatch_vector; - using impl::floor_output_typeid_vector; - using impl::floor_strided_dispatch_vector; - - auto floor_pyapi = [&](const arrayT &src, const arrayT &dst, + auto acosh_pyapi = [&](const arrayT &src, const arrayT &dst, sycl::queue &exec_q, const event_vecT &depends = {}) { return py_unary_ufunc( - src, dst, exec_q, depends, floor_output_typeid_vector, - floor_contig_dispatch_vector, floor_strided_dispatch_vector); + src, dst, exec_q, depends, acosh_output_typeid_vector, + acosh_contig_dispatch_vector, acosh_strided_dispatch_vector); }; - m.def("_floor", floor_pyapi, "", py::arg("src"), py::arg("dst"), + m.def("_acosh", acosh_pyapi, "", py::arg("src"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); - auto floor_result_type_pyapi = [&](const py::dtype &dtype) { + auto acosh_result_type_pyapi = [&](const py::dtype &dtype) { return py_unary_ufunc_result_type(dtype, - floor_output_typeid_vector); - }; - m.def("_floor_result_type", floor_result_type_pyapi); - } - - // B10: ==== FLOOR_DIVIDE (x1, x2) - { - impl::populate_floor_divide_dispatch_tables(); - using impl::floor_divide_contig_dispatch_table; - using impl::floor_divide_output_id_table; - using impl::floor_divide_strided_dispatch_table; - - auto floor_divide_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, - const dpctl::tensor::usm_ndarray &src2, - const dpctl::tensor::usm_ndarray &dst, - sycl::queue &exec_q, - const std::vector &depends = - {}) { - return py_binary_ufunc( - src1, src2, dst, exec_q, depends, floor_divide_output_id_table, - // function pointers to handle operation on contiguous arrays - // (pointers may be nullptr) - floor_divide_contig_dispatch_table, - // function pointers to handle operation on strided arrays (most - // general case) - floor_divide_strided_dispatch_table, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); - }; - auto floor_divide_result_type_pyapi = [&](const py::dtype &dtype1, - const py::dtype &dtype2) { - return py_binary_ufunc_result_type(dtype1, dtype2, - floor_divide_output_id_table); - }; - m.def("_floor_divide", floor_divide_pyapi, "", py::arg("src1"), - py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - m.def("_floor_divide_result_type", floor_divide_result_type_pyapi, ""); - - using impl::floor_divide_inplace_contig_dispatch_table; - using impl::floor_divide_inplace_strided_dispatch_table; - - auto floor_divide_inplace_pyapi = - [&](const dpctl::tensor::usm_ndarray &src, - const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, - const std::vector &depends = {}) { - return py_binary_inplace_ufunc( - src, dst, exec_q, depends, floor_divide_output_id_table, - // function pointers to handle inplace operation on - // contiguous arrays (pointers may be nullptr) - floor_divide_inplace_contig_dispatch_table, - // function pointers to handle inplace operation on strided - // arrays (most general case) - floor_divide_inplace_strided_dispatch_table, - // function pointers to handle inplace operation on - // c-contig matrix with c-contig row with broadcasting - // (may be nullptr) - td_ns::NullPtrTable< - binary_inplace_row_matrix_broadcast_impl_fn_ptr_t>{}); - }; - m.def("_floor_divide_inplace", floor_divide_inplace_pyapi, "", - py::arg("lhs"), py::arg("rhs"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - } - - // B11: ==== GREATER (x1, x2) - { - impl::populate_greater_dispatch_tables(); - using impl::greater_contig_dispatch_table; - using impl::greater_output_id_table; - using impl::greater_strided_dispatch_table; - - auto greater_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, - const dpctl::tensor::usm_ndarray &src2, - const dpctl::tensor::usm_ndarray &dst, - sycl::queue &exec_q, - const std::vector &depends = {}) { - return py_binary_ufunc( - src1, src2, dst, exec_q, depends, greater_output_id_table, - // function pointers to handle operation on contiguous arrays - // (pointers may be nullptr) - greater_contig_dispatch_table, - // function pointers to handle operation on strided arrays (most - // general case) - greater_strided_dispatch_table, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); - }; - auto greater_result_type_pyapi = [&](const py::dtype &dtype1, - const py::dtype &dtype2) { - return py_binary_ufunc_result_type(dtype1, dtype2, - greater_output_id_table); + acosh_output_typeid_vector); }; - m.def("_greater", greater_pyapi, "", py::arg("src1"), py::arg("src2"), - py::arg("dst"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - m.def("_greater_result_type", greater_result_type_pyapi, ""); + m.def("_acosh_result_type", acosh_result_type_pyapi); } - // B12: ==== GREATER_EQUAL (x1, x2) + // B01: ===== ADD (x1, x2) { - impl::populate_greater_equal_dispatch_tables(); - using impl::greater_equal_contig_dispatch_table; - using impl::greater_equal_output_id_table; - using impl::greater_equal_strided_dispatch_table; + impl::populate_add_dispatch_tables(); + using impl::add_contig_dispatch_table; + using impl::add_contig_matrix_contig_row_broadcast_dispatch_table; + using impl::add_contig_row_contig_matrix_broadcast_dispatch_table; + using impl::add_output_id_table; + using impl::add_strided_dispatch_table; - auto greater_equal_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, - const dpctl::tensor::usm_ndarray &src2, - const dpctl::tensor::usm_ndarray &dst, - sycl::queue &exec_q, - const std::vector &depends = - {}) { + auto add_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, + const dpctl::tensor::usm_ndarray &src2, + const dpctl::tensor::usm_ndarray &dst, + sycl::queue &exec_q, + const std::vector &depends = {}) { return py_binary_ufunc( - src1, src2, dst, exec_q, depends, greater_equal_output_id_table, + src1, src2, dst, exec_q, depends, add_output_id_table, // function pointers to handle operation on contiguous arrays // (pointers may be nullptr) - greater_equal_contig_dispatch_table, + add_contig_dispatch_table, // function pointers to handle operation on strided arrays (most // general case) - greater_equal_strided_dispatch_table, + add_strided_dispatch_table, // function pointers to handle operation of c-contig matrix and // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + add_contig_matrix_contig_row_broadcast_dispatch_table, // function pointers to handle operation of c-contig matrix and // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + add_contig_row_contig_matrix_broadcast_dispatch_table); }; - auto greater_equal_result_type_pyapi = [&](const py::dtype &dtype1, - const py::dtype &dtype2) { + auto add_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { return py_binary_ufunc_result_type(dtype1, dtype2, - greater_equal_output_id_table); + add_output_id_table); }; - m.def("_greater_equal", greater_equal_pyapi, "", py::arg("src1"), - py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), + m.def("_add", add_pyapi, "", py::arg("src1"), py::arg("src2"), + py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_add_result_type", add_result_type_pyapi, ""); + + using impl::add_inplace_contig_dispatch_table; + using impl::add_inplace_row_matrix_dispatch_table; + using impl::add_inplace_strided_dispatch_table; + + auto add_inplace_pyapi = + [&](const dpctl::tensor::usm_ndarray &src, + const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, + const std::vector &depends = {}) { + return py_binary_inplace_ufunc( + src, dst, exec_q, depends, add_output_id_table, + // function pointers to handle inplace operation on + // contiguous arrays (pointers may be nullptr) + add_inplace_contig_dispatch_table, + // function pointers to handle inplace operation on strided + // arrays (most general case) + add_inplace_strided_dispatch_table, + // function pointers to handle inplace operation on + // c-contig matrix with c-contig row with broadcasting + // (may be nullptr) + add_inplace_row_matrix_dispatch_table); + }; + m.def("_add_inplace", add_inplace_pyapi, "", py::arg("lhs"), + py::arg("rhs"), py::arg("sycl_queue"), py::arg("depends") = py::list()); - m.def("_greater_equal_result_type", greater_equal_result_type_pyapi, - ""); } - // U16: ==== IMAG (x) + // U04: ===== ASIN (x) { - impl::populate_imag_dispatch_vectors(); - using impl::imag_contig_dispatch_vector; - using impl::imag_output_typeid_vector; - using impl::imag_strided_dispatch_vector; + impl::populate_asin_dispatch_vectors(); + using impl::asin_contig_dispatch_vector; + using impl::asin_output_typeid_vector; + using impl::asin_strided_dispatch_vector; - auto imag_pyapi = [&](const arrayT &src, const arrayT &dst, + auto asin_pyapi = [&](const arrayT &src, const arrayT &dst, sycl::queue &exec_q, const event_vecT &depends = {}) { return py_unary_ufunc( - src, dst, exec_q, depends, imag_output_typeid_vector, - imag_contig_dispatch_vector, imag_strided_dispatch_vector); + src, dst, exec_q, depends, asin_output_typeid_vector, + asin_contig_dispatch_vector, asin_strided_dispatch_vector); }; - m.def("_imag", imag_pyapi, "", py::arg("src"), py::arg("dst"), + m.def("_asin", asin_pyapi, "", py::arg("src"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); - auto imag_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, imag_output_typeid_vector); - }; - m.def("_imag_result_type", imag_result_type_pyapi); - } - - // U17: ==== ISFINITE (x) - { - impl::populate_isfinite_dispatch_vectors(); - - using impl::isfinite_contig_dispatch_vector; - using impl::isfinite_output_typeid_vector; - using impl::isfinite_strided_dispatch_vector; - auto isfinite_pyapi = - [&](const dpctl::tensor::usm_ndarray &src, - const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, - const std::vector &depends = {}) { - return py_unary_ufunc(src, dst, exec_q, depends, - isfinite_output_typeid_vector, - isfinite_contig_dispatch_vector, - isfinite_strided_dispatch_vector); - }; - auto isfinite_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, - isfinite_output_typeid_vector); + auto asin_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, asin_output_typeid_vector); }; - m.def("_isfinite", isfinite_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - m.def("_isfinite_result_type", isfinite_result_type_pyapi, ""); + m.def("_asin_result_type", asin_result_type_pyapi); } - // U18: ==== ISINF (x) + // U05: ===== ASINH (x) { - impl::populate_isinf_dispatch_vectors(); + impl::populate_asinh_dispatch_vectors(); + using impl::asinh_contig_dispatch_vector; + using impl::asinh_output_typeid_vector; + using impl::asinh_strided_dispatch_vector; - using impl::isinf_contig_dispatch_vector; - using impl::isinf_output_typeid_vector; - using impl::isinf_strided_dispatch_vector; - auto isinf_pyapi = [&](const dpctl::tensor::usm_ndarray &src, - const dpctl::tensor::usm_ndarray &dst, + auto asinh_pyapi = [&](const arrayT &src, const arrayT &dst, sycl::queue &exec_q, - const std::vector &depends = {}) { + const event_vecT &depends = {}) { return py_unary_ufunc( - src, dst, exec_q, depends, isinf_output_typeid_vector, - isinf_contig_dispatch_vector, isinf_strided_dispatch_vector); + src, dst, exec_q, depends, asinh_output_typeid_vector, + asinh_contig_dispatch_vector, asinh_strided_dispatch_vector); }; - auto isinf_result_type_pyapi = [&](const py::dtype &dtype) { + m.def("_asinh", asinh_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto asinh_result_type_pyapi = [&](const py::dtype &dtype) { return py_unary_ufunc_result_type(dtype, - isinf_output_typeid_vector); + asinh_output_typeid_vector); }; - m.def("_isinf", isinf_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - m.def("_isinf_result_type", isinf_result_type_pyapi, ""); + m.def("_asinh_result_type", asinh_result_type_pyapi); } - // U19: ==== ISNAN (x) + // U06: ===== ATAN (x) { - impl::populate_isnan_dispatch_vectors(); + impl::populate_atan_dispatch_vectors(); + using impl::atan_contig_dispatch_vector; + using impl::atan_output_typeid_vector; + using impl::atan_strided_dispatch_vector; - using impl::isnan_contig_dispatch_vector; - using impl::isnan_output_typeid_vector; - using impl::isnan_strided_dispatch_vector; - auto isnan_pyapi = [&](const dpctl::tensor::usm_ndarray &src, - const dpctl::tensor::usm_ndarray &dst, - sycl::queue &exec_q, - const std::vector &depends = {}) { + auto atan_pyapi = [&](arrayT src, arrayT dst, sycl::queue &exec_q, + const event_vecT &depends = {}) { return py_unary_ufunc( - src, dst, exec_q, depends, isnan_output_typeid_vector, - isnan_contig_dispatch_vector, isnan_strided_dispatch_vector); - }; - auto isnan_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, - isnan_output_typeid_vector); + src, dst, exec_q, depends, atan_output_typeid_vector, + atan_contig_dispatch_vector, atan_strided_dispatch_vector); }; - m.def("_isnan", isnan_pyapi, "", py::arg("src"), py::arg("dst"), + m.def("_atan", atan_pyapi, "", py::arg("src"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); - m.def("_isnan_result_type", isnan_result_type_pyapi, ""); - } - // B13: ==== LESS (x1, x2) - { - impl::populate_less_dispatch_tables(); - using impl::less_contig_dispatch_table; - using impl::less_output_id_table; - using impl::less_strided_dispatch_table; - - auto less_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, - const dpctl::tensor::usm_ndarray &src2, - const dpctl::tensor::usm_ndarray &dst, - sycl::queue &exec_q, - const std::vector &depends = {}) { - return py_binary_ufunc( - src1, src2, dst, exec_q, depends, less_output_id_table, - // function pointers to handle operation on contiguous arrays - // (pointers may be nullptr) - less_contig_dispatch_table, - // function pointers to handle operation on strided arrays (most - // general case) - less_strided_dispatch_table, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); - }; - auto less_result_type_pyapi = [&](const py::dtype &dtype1, - const py::dtype &dtype2) { - return py_binary_ufunc_result_type(dtype1, dtype2, - less_output_id_table); + auto atan_result_type_pyapi = [&](py::dtype dtype) { + return py_unary_ufunc_result_type(dtype, atan_output_typeid_vector); }; - m.def("_less", less_pyapi, "", py::arg("src1"), py::arg("src2"), - py::arg("dst"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - m.def("_less_result_type", less_result_type_pyapi, ""); + m.def("_atan_result_type", atan_result_type_pyapi); } - // B14: ==== LESS_EQUAL (x1, x2) + // B02: ===== ATAN2 (x1, x2) { - impl::populate_less_equal_dispatch_tables(); - using impl::less_equal_contig_dispatch_table; - using impl::less_equal_output_id_table; - using impl::less_equal_strided_dispatch_table; + impl::populate_atan2_dispatch_tables(); + using impl::atan2_contig_dispatch_table; + using impl::atan2_output_id_table; + using impl::atan2_strided_dispatch_table; - auto less_equal_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, - const dpctl::tensor::usm_ndarray &src2, - const dpctl::tensor::usm_ndarray &dst, - sycl::queue &exec_q, - const std::vector &depends = - {}) { + auto atan2_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, + const dpctl::tensor::usm_ndarray &src2, + const dpctl::tensor::usm_ndarray &dst, + sycl::queue &exec_q, + const std::vector &depends = {}) { return py_binary_ufunc( - src1, src2, dst, exec_q, depends, less_equal_output_id_table, + src1, src2, dst, exec_q, depends, atan2_output_id_table, // function pointers to handle operation on contiguous arrays // (pointers may be nullptr) - less_equal_contig_dispatch_table, + atan2_contig_dispatch_table, // function pointers to handle operation on strided arrays (most // general case) - less_equal_strided_dispatch_table, + atan2_strided_dispatch_table, // function pointers to handle operation of c-contig matrix and // c-contig row with broadcasting (may be nullptr) td_ns::NullPtrTable< @@ -4316,132 +1880,62 @@ void init_elementwise_functions(py::module_ m) td_ns::NullPtrTable< binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); }; - auto less_equal_result_type_pyapi = [&](const py::dtype &dtype1, - const py::dtype &dtype2) { + auto atan2_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { return py_binary_ufunc_result_type(dtype1, dtype2, - less_equal_output_id_table); + atan2_output_id_table); }; - m.def("_less_equal", less_equal_pyapi, "", py::arg("src1"), - py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), + m.def("_atan2", atan2_pyapi, "", py::arg("src1"), py::arg("src2"), + py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); - m.def("_less_equal_result_type", less_equal_result_type_pyapi, ""); - } - - // U20: ==== LOG (x) - { - impl::populate_log_dispatch_vectors(); - using impl::log_contig_dispatch_vector; - using impl::log_output_typeid_vector; - using impl::log_strided_dispatch_vector; - - auto log_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, log_output_typeid_vector, - log_contig_dispatch_vector, log_strided_dispatch_vector); - }; - m.def("_log", log_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto log_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, log_output_typeid_vector); - }; - m.def("_log_result_type", log_result_type_pyapi); + m.def("_atan2_result_type", atan2_result_type_pyapi, ""); } - // U21: ==== LOG1P (x) + // U07: ===== ATANH (x) { - impl::populate_log1p_dispatch_vectors(); - using impl::log1p_contig_dispatch_vector; - using impl::log1p_output_typeid_vector; - using impl::log1p_strided_dispatch_vector; + impl::populate_atanh_dispatch_vectors(); + using impl::atanh_contig_dispatch_vector; + using impl::atanh_output_typeid_vector; + using impl::atanh_strided_dispatch_vector; - auto log1p_pyapi = [&](const arrayT &src, const arrayT &dst, + auto atanh_pyapi = [&](const arrayT &src, const arrayT &dst, sycl::queue &exec_q, const event_vecT &depends = {}) { return py_unary_ufunc( - src, dst, exec_q, depends, log1p_output_typeid_vector, - log1p_contig_dispatch_vector, log1p_strided_dispatch_vector); + src, dst, exec_q, depends, atanh_output_typeid_vector, + atanh_contig_dispatch_vector, atanh_strided_dispatch_vector); }; - m.def("_log1p", log1p_pyapi, "", py::arg("src"), py::arg("dst"), + m.def("_atanh", atanh_pyapi, "", py::arg("src"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); - auto log1p_result_type_pyapi = [&](const py::dtype &dtype) { + auto atanh_result_type_pyapi = [&](const py::dtype &dtype) { return py_unary_ufunc_result_type(dtype, - log1p_output_typeid_vector); - }; - m.def("_log1p_result_type", log1p_result_type_pyapi); - } - - // U22: ==== LOG2 (x) - { - impl::populate_log2_dispatch_vectors(); - - using impl::log2_contig_dispatch_vector; - using impl::log2_output_typeid_vector; - using impl::log2_strided_dispatch_vector; - auto log2_pyapi = [&](const dpctl::tensor::usm_ndarray &src, - const dpctl::tensor::usm_ndarray &dst, - sycl::queue &exec_q, - const std::vector &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, log2_output_typeid_vector, - log2_contig_dispatch_vector, log2_strided_dispatch_vector); - }; - auto log2_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, log2_output_typeid_vector); + atanh_output_typeid_vector); }; - m.def("_log2", log2_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - m.def("_log2_result_type", log2_result_type_pyapi, ""); + m.def("_atanh_result_type", atanh_result_type_pyapi); } - // U23: ==== LOG10 (x) + // B03: ===== BITWISE_AND (x1, x2) { - impl::populate_log10_dispatch_vectors(); - - using impl::log10_contig_dispatch_vector; - using impl::log10_output_typeid_vector; - using impl::log10_strided_dispatch_vector; - auto log10_pyapi = [&](const dpctl::tensor::usm_ndarray &src, - const dpctl::tensor::usm_ndarray &dst, - sycl::queue &exec_q, - const std::vector &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, log10_output_typeid_vector, - log10_contig_dispatch_vector, log10_strided_dispatch_vector); - }; - auto log10_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, - log10_output_typeid_vector); - }; - m.def("_log10", log10_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - m.def("_log10_result_type", log10_result_type_pyapi, ""); - } + impl::populate_bitwise_and_dispatch_tables(); + using impl::bitwise_and_contig_dispatch_table; + using impl::bitwise_and_output_id_table; + using impl::bitwise_and_strided_dispatch_table; - // B15: ==== LOGADDEXP (x1, x2) - { - impl::populate_logaddexp_dispatch_tables(); - using impl::logaddexp_contig_dispatch_table; - using impl::logaddexp_output_id_table; - using impl::logaddexp_strided_dispatch_table; - - auto logaddexp_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, - const dpctl::tensor::usm_ndarray &src2, - const dpctl::tensor::usm_ndarray &dst, - sycl::queue &exec_q, - const std::vector &depends = - {}) { + auto bitwise_and_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, + const dpctl::tensor::usm_ndarray &src2, + const dpctl::tensor::usm_ndarray &dst, + sycl::queue &exec_q, + const std::vector &depends = + {}) { return py_binary_ufunc( - src1, src2, dst, exec_q, depends, logaddexp_output_id_table, + src1, src2, dst, exec_q, depends, bitwise_and_output_id_table, // function pointers to handle operation on contiguous arrays // (pointers may be nullptr) - logaddexp_contig_dispatch_table, + bitwise_and_contig_dispatch_table, // function pointers to handle operation on strided arrays (most // general case) - logaddexp_strided_dispatch_table, + bitwise_and_strided_dispatch_table, // function pointers to handle operation of c-contig matrix and // c-contig row with broadcasting (may be nullptr) td_ns::NullPtrTable< @@ -4451,38 +1945,67 @@ void init_elementwise_functions(py::module_ m) td_ns::NullPtrTable< binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); }; - auto logaddexp_result_type_pyapi = [&](const py::dtype &dtype1, - const py::dtype &dtype2) { + auto bitwise_and_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { return py_binary_ufunc_result_type(dtype1, dtype2, - logaddexp_output_id_table); + bitwise_and_output_id_table); }; - m.def("_logaddexp", logaddexp_pyapi, "", py::arg("src1"), + m.def("_bitwise_and", bitwise_and_pyapi, "", py::arg("src1"), py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); - m.def("_logaddexp_result_type", logaddexp_result_type_pyapi, ""); + m.def("_bitwise_and_result_type", bitwise_and_result_type_pyapi, ""); + + using impl::bitwise_and_inplace_contig_dispatch_table; + using impl::bitwise_and_inplace_strided_dispatch_table; + + auto bitwise_and_inplace_pyapi = + [&](const dpctl::tensor::usm_ndarray &src, + const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, + const std::vector &depends = {}) { + return py_binary_inplace_ufunc( + src, dst, exec_q, depends, bitwise_and_output_id_table, + // function pointers to handle inplace operation on + // contiguous arrays (pointers may be nullptr) + bitwise_and_inplace_contig_dispatch_table, + // function pointers to handle inplace operation on strided + // arrays (most general case) + bitwise_and_inplace_strided_dispatch_table, + // function pointers to handle inplace operation on + // c-contig matrix with c-contig row with broadcasting + // (may be nullptr) + td_ns::NullPtrTable< + binary_inplace_row_matrix_broadcast_impl_fn_ptr_t>{}); + }; + m.def("_bitwise_and_inplace", bitwise_and_inplace_pyapi, "", + py::arg("lhs"), py::arg("rhs"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); } - // B16: ==== LOGICAL_AND (x1, x2) + // B04: ===== BITWISE_LEFT_SHIFT (x1, x2) { - impl::populate_logical_and_dispatch_tables(); - using impl::logical_and_contig_dispatch_table; - using impl::logical_and_output_id_table; - using impl::logical_and_strided_dispatch_table; + impl::populate_bitwise_left_shift_dispatch_tables(); + using impl::bitwise_left_shift_contig_dispatch_table; + using impl::bitwise_left_shift_output_id_table; + using impl::bitwise_left_shift_strided_dispatch_table; - auto logical_and_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, - const dpctl::tensor::usm_ndarray &src2, - const dpctl::tensor::usm_ndarray &dst, - sycl::queue &exec_q, - const std::vector &depends = - {}) { + auto bitwise_left_shift_pyapi = [&](const dpctl::tensor::usm_ndarray + &src1, + const dpctl::tensor::usm_ndarray + &src2, + const dpctl::tensor::usm_ndarray + &dst, + sycl::queue &exec_q, + const std::vector + &depends = {}) { return py_binary_ufunc( - src1, src2, dst, exec_q, depends, logical_and_output_id_table, + src1, src2, dst, exec_q, depends, + bitwise_left_shift_output_id_table, // function pointers to handle operation on contiguous arrays // (pointers may be nullptr) - logical_and_contig_dispatch_table, + bitwise_left_shift_contig_dispatch_table, // function pointers to handle operation on strided arrays (most // general case) - logical_and_strided_dispatch_table, + bitwise_left_shift_strided_dispatch_table, // function pointers to handle operation of c-contig matrix and // c-contig row with broadcasting (may be nullptr) td_ns::NullPtrTable< @@ -4492,64 +2015,91 @@ void init_elementwise_functions(py::module_ m) td_ns::NullPtrTable< binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); }; - auto logical_and_result_type_pyapi = [&](const py::dtype &dtype1, - const py::dtype &dtype2) { - return py_binary_ufunc_result_type(dtype1, dtype2, - logical_and_output_id_table); - }; - m.def("_logical_and", logical_and_pyapi, "", py::arg("src1"), - py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), + auto bitwise_left_shift_result_type_pyapi = + [&](const py::dtype &dtype1, const py::dtype &dtype2) { + return py_binary_ufunc_result_type( + dtype1, dtype2, bitwise_left_shift_output_id_table); + }; + m.def("_bitwise_left_shift", bitwise_left_shift_pyapi, "", + py::arg("src1"), py::arg("src2"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + m.def("_bitwise_left_shift_result_type", + bitwise_left_shift_result_type_pyapi, ""); + + using impl::bitwise_left_shift_inplace_contig_dispatch_table; + using impl::bitwise_left_shift_inplace_strided_dispatch_table; + + auto bitwise_left_shift_inplace_pyapi = + [&](const dpctl::tensor::usm_ndarray &src, + const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, + const std::vector &depends = {}) { + return py_binary_inplace_ufunc( + src, dst, exec_q, depends, + bitwise_left_shift_output_id_table, + // function pointers to handle inplace operation on + // contiguous arrays (pointers may be nullptr) + bitwise_left_shift_inplace_contig_dispatch_table, + // function pointers to handle inplace operation on strided + // arrays (most general case) + bitwise_left_shift_inplace_strided_dispatch_table, + // function pointers to handle inplace operation on + // c-contig matrix with c-contig row with broadcasting + // (may be nullptr) + td_ns::NullPtrTable< + binary_inplace_row_matrix_broadcast_impl_fn_ptr_t>{}); + }; + m.def("_bitwise_left_shift_inplace", bitwise_left_shift_inplace_pyapi, + "", py::arg("lhs"), py::arg("rhs"), py::arg("sycl_queue"), py::arg("depends") = py::list()); - m.def("_logical_and_result_type", logical_and_result_type_pyapi, ""); } - // U24: ==== LOGICAL_NOT (x) + // U08: ===== BITWISE_INVERT (x) { - impl::populate_logical_not_dispatch_vectors(); - using impl::logical_not_contig_dispatch_vector; - using impl::logical_not_output_typeid_vector; - using impl::logical_not_strided_dispatch_vector; + impl::populate_bitwise_invert_dispatch_vectors(); + using impl::bitwise_invert_contig_dispatch_vector; + using impl::bitwise_invert_output_typeid_vector; + using impl::bitwise_invert_strided_dispatch_vector; - auto logical_not_pyapi = [&](const arrayT &src, arrayT dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { + auto bitwise_invert_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { return py_unary_ufunc(src, dst, exec_q, depends, - logical_not_output_typeid_vector, - logical_not_contig_dispatch_vector, - logical_not_strided_dispatch_vector); + bitwise_invert_output_typeid_vector, + bitwise_invert_contig_dispatch_vector, + bitwise_invert_strided_dispatch_vector); }; - m.def("_logical_not", logical_not_pyapi, "", py::arg("src"), + m.def("_bitwise_invert", bitwise_invert_pyapi, "", py::arg("src"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); - auto logical_not_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, - logical_not_output_typeid_vector); + auto bitwise_invert_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type( + dtype, bitwise_invert_output_typeid_vector); }; - m.def("_logical_not_result_type", logical_not_result_type_pyapi); + m.def("_bitwise_invert_result_type", bitwise_invert_result_type_pyapi); } - // B17: ==== LOGICAL_OR (x1, x2) + // B05: ===== BITWISE_OR (x1, x2) { - impl::populate_logical_or_dispatch_tables(); - using impl::logical_or_contig_dispatch_table; - using impl::logical_or_output_id_table; - using impl::logical_or_strided_dispatch_table; + impl::populate_bitwise_or_dispatch_tables(); + using impl::bitwise_or_contig_dispatch_table; + using impl::bitwise_or_output_id_table; + using impl::bitwise_or_strided_dispatch_table; - auto logical_or_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, + auto bitwise_or_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, const dpctl::tensor::usm_ndarray &src2, const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, const std::vector &depends = {}) { return py_binary_ufunc( - src1, src2, dst, exec_q, depends, logical_or_output_id_table, + src1, src2, dst, exec_q, depends, bitwise_or_output_id_table, // function pointers to handle operation on contiguous arrays // (pointers may be nullptr) - logical_or_contig_dispatch_table, + bitwise_or_contig_dispatch_table, // function pointers to handle operation on strided arrays (most // general case) - logical_or_strided_dispatch_table, + bitwise_or_strided_dispatch_table, // function pointers to handle operation of c-contig matrix and // c-contig row with broadcasting (may be nullptr) td_ns::NullPtrTable< @@ -4559,38 +2109,67 @@ void init_elementwise_functions(py::module_ m) td_ns::NullPtrTable< binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); }; - auto logical_or_result_type_pyapi = [&](const py::dtype &dtype1, + auto bitwise_or_result_type_pyapi = [&](const py::dtype &dtype1, const py::dtype &dtype2) { return py_binary_ufunc_result_type(dtype1, dtype2, - logical_or_output_id_table); + bitwise_or_output_id_table); }; - m.def("_logical_or", logical_or_pyapi, "", py::arg("src1"), + m.def("_bitwise_or", bitwise_or_pyapi, "", py::arg("src1"), py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); - m.def("_logical_or_result_type", logical_or_result_type_pyapi, ""); + m.def("_bitwise_or_result_type", bitwise_or_result_type_pyapi, ""); + + using impl::bitwise_or_inplace_contig_dispatch_table; + using impl::bitwise_or_inplace_strided_dispatch_table; + + auto bitwise_or_inplace_pyapi = + [&](const dpctl::tensor::usm_ndarray &src, + const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, + const std::vector &depends = {}) { + return py_binary_inplace_ufunc( + src, dst, exec_q, depends, bitwise_or_output_id_table, + // function pointers to handle inplace operation on + // contiguous arrays (pointers may be nullptr) + bitwise_or_inplace_contig_dispatch_table, + // function pointers to handle inplace operation on strided + // arrays (most general case) + bitwise_or_inplace_strided_dispatch_table, + // function pointers to handle inplace operation on + // c-contig matrix with c-contig row with broadcasting + // (may be nullptr) + td_ns::NullPtrTable< + binary_inplace_row_matrix_broadcast_impl_fn_ptr_t>{}); + }; + m.def("_bitwise_or_inplace", bitwise_or_inplace_pyapi, "", + py::arg("lhs"), py::arg("rhs"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); } - // B18: ==== LOGICAL_XOR (x1, x2) + // B06: ===== BITWISE_RIGHT_SHIFT (x1, x2) { - impl::populate_logical_xor_dispatch_tables(); - using impl::logical_xor_contig_dispatch_table; - using impl::logical_xor_output_id_table; - using impl::logical_xor_strided_dispatch_table; + impl::populate_bitwise_right_shift_dispatch_tables(); + using impl::bitwise_right_shift_contig_dispatch_table; + using impl::bitwise_right_shift_output_id_table; + using impl::bitwise_right_shift_strided_dispatch_table; - auto logical_xor_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, - const dpctl::tensor::usm_ndarray &src2, - const dpctl::tensor::usm_ndarray &dst, - sycl::queue &exec_q, - const std::vector &depends = - {}) { + auto bitwise_right_shift_pyapi = [&](const dpctl::tensor::usm_ndarray + &src1, + const dpctl::tensor::usm_ndarray + &src2, + const dpctl::tensor::usm_ndarray + &dst, + sycl::queue &exec_q, + const std::vector + &depends = {}) { return py_binary_ufunc( - src1, src2, dst, exec_q, depends, logical_xor_output_id_table, + src1, src2, dst, exec_q, depends, + bitwise_right_shift_output_id_table, // function pointers to handle operation on contiguous arrays // (pointers may be nullptr) - logical_xor_contig_dispatch_table, + bitwise_right_shift_contig_dispatch_table, // function pointers to handle operation on strided arrays (most // general case) - logical_xor_strided_dispatch_table, + bitwise_right_shift_strided_dispatch_table, // function pointers to handle operation of c-contig matrix and // c-contig row with broadcasting (may be nullptr) td_ns::NullPtrTable< @@ -4600,385 +2179,403 @@ void init_elementwise_functions(py::module_ m) td_ns::NullPtrTable< binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); }; - auto logical_xor_result_type_pyapi = [&](const py::dtype &dtype1, - const py::dtype &dtype2) { - return py_binary_ufunc_result_type(dtype1, dtype2, - logical_xor_output_id_table); - }; - m.def("_logical_xor", logical_xor_pyapi, "", py::arg("src1"), - py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - m.def("_logical_xor_result_type", logical_xor_result_type_pyapi, ""); - } + auto bitwise_right_shift_result_type_pyapi = + [&](const py::dtype &dtype1, const py::dtype &dtype2) { + return py_binary_ufunc_result_type( + dtype1, dtype2, bitwise_right_shift_output_id_table); + }; + m.def("_bitwise_right_shift", bitwise_right_shift_pyapi, "", + py::arg("src1"), py::arg("src2"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + m.def("_bitwise_right_shift_result_type", + bitwise_right_shift_result_type_pyapi, ""); - // B??: ==== MAXIMUM (x1, x2) - { - impl::populate_maximum_dispatch_tables(); - using impl::maximum_contig_dispatch_table; - using impl::maximum_output_id_table; - using impl::maximum_strided_dispatch_table; + using impl::bitwise_right_shift_inplace_contig_dispatch_table; + using impl::bitwise_right_shift_inplace_strided_dispatch_table; - auto maximum_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, - const dpctl::tensor::usm_ndarray &src2, - const dpctl::tensor::usm_ndarray &dst, - sycl::queue &exec_q, - const std::vector &depends = {}) { - return py_binary_ufunc( - src1, src2, dst, exec_q, depends, maximum_output_id_table, - // function pointers to handle operation on contiguous - // arrays (pointers may be nullptr) - maximum_contig_dispatch_table, - // function pointers to handle operation on strided arrays - // (most general case) - maximum_strided_dispatch_table, - // function pointers to handle operation of c-contig matrix - // and c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, - // function pointers to handle operation of c-contig matrix - // and c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); - }; - auto maximum_result_type_pyapi = [&](const py::dtype &dtype1, - const py::dtype &dtype2) { - return py_binary_ufunc_result_type(dtype1, dtype2, - maximum_output_id_table); - }; - m.def("_maximum", maximum_pyapi, "", py::arg("src1"), py::arg("src2"), - py::arg("dst"), py::arg("sycl_queue"), + auto bitwise_right_shift_inplace_pyapi = + [&](const dpctl::tensor::usm_ndarray &src, + const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, + const std::vector &depends = {}) { + return py_binary_inplace_ufunc( + src, dst, exec_q, depends, + bitwise_right_shift_output_id_table, + // function pointers to handle inplace operation on + // contiguous arrays (pointers may be nullptr) + bitwise_right_shift_inplace_contig_dispatch_table, + // function pointers to handle inplace operation on strided + // arrays (most general case) + bitwise_right_shift_inplace_strided_dispatch_table, + // function pointers to handle inplace operation on + // c-contig matrix with c-contig row with broadcasting + // (may be nullptr) + td_ns::NullPtrTable< + binary_inplace_row_matrix_broadcast_impl_fn_ptr_t>{}); + }; + m.def("_bitwise_right_shift_inplace", bitwise_right_shift_inplace_pyapi, + "", py::arg("lhs"), py::arg("rhs"), py::arg("sycl_queue"), py::arg("depends") = py::list()); - m.def("_maximum_result_type", maximum_result_type_pyapi, ""); } - // B??: ==== MINIMUM (x1, x2) + // B07: ===== BITWISE_XOR (x1, x2) { - impl::populate_minimum_dispatch_tables(); - using impl::minimum_contig_dispatch_table; - using impl::minimum_output_id_table; - using impl::minimum_strided_dispatch_table; + impl::populate_bitwise_xor_dispatch_tables(); + using impl::bitwise_xor_contig_dispatch_table; + using impl::bitwise_xor_output_id_table; + using impl::bitwise_xor_strided_dispatch_table; - auto minimum_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, - const dpctl::tensor::usm_ndarray &src2, - const dpctl::tensor::usm_ndarray &dst, - sycl::queue &exec_q, - const std::vector &depends = {}) { + auto bitwise_xor_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, + const dpctl::tensor::usm_ndarray &src2, + const dpctl::tensor::usm_ndarray &dst, + sycl::queue &exec_q, + const std::vector &depends = + {}) { return py_binary_ufunc( - src1, src2, dst, exec_q, depends, minimum_output_id_table, - // function pointers to handle operation on contiguous - // arrays (pointers may be nullptr) - minimum_contig_dispatch_table, - // function pointers to handle operation on strided arrays - // (most general case) - minimum_strided_dispatch_table, - // function pointers to handle operation of c-contig matrix - // and c-contig row with broadcasting (may be nullptr) + src1, src2, dst, exec_q, depends, bitwise_xor_output_id_table, + // function pointers to handle operation on contiguous arrays + // (pointers may be nullptr) + bitwise_xor_contig_dispatch_table, + // function pointers to handle operation on strided arrays (most + // general case) + bitwise_xor_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) td_ns::NullPtrTable< binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, // function pointers to handle operation of c-contig matrix and // c-contig row with broadcasting (may be nullptr) td_ns::NullPtrTable< binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); - }; - auto minimum_result_type_pyapi = [&](const py::dtype &dtype1, - const py::dtype &dtype2) { - return py_binary_ufunc_result_type(dtype1, dtype2, - minimum_output_id_table); - }; - m.def("_minimum", minimum_pyapi, "", py::arg("src1"), py::arg("src2"), - py::arg("dst"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - m.def("_minimum_result_type", minimum_result_type_pyapi, ""); - } - - // B19: ==== MULTIPLY (x1, x2) - { - impl::populate_multiply_dispatch_tables(); - using impl::multiply_contig_dispatch_table; - using impl::multiply_contig_matrix_contig_row_broadcast_dispatch_table; - using impl::multiply_contig_row_contig_matrix_broadcast_dispatch_table; - using impl::multiply_output_id_table; - using impl::multiply_strided_dispatch_table; - - auto multiply_pyapi = - [&](const dpctl::tensor::usm_ndarray &src1, - const dpctl::tensor::usm_ndarray &src2, - const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, - const std::vector &depends = {}) { - return py_binary_ufunc( - src1, src2, dst, exec_q, depends, multiply_output_id_table, - // function pointers to handle operation on contiguous - // arrays (pointers may be nullptr) - multiply_contig_dispatch_table, - // function pointers to handle operation on strided arrays - // (most general case) - multiply_strided_dispatch_table, - // function pointers to handle operation of c-contig matrix - // and c-contig row with broadcasting (may be nullptr) - multiply_contig_matrix_contig_row_broadcast_dispatch_table, - // function pointers to handle operation of c-contig matrix - // and c-contig row with broadcasting (may be nullptr) - multiply_contig_row_contig_matrix_broadcast_dispatch_table); - }; - auto multiply_result_type_pyapi = [&](const py::dtype &dtype1, - const py::dtype &dtype2) { + }; + auto bitwise_xor_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { return py_binary_ufunc_result_type(dtype1, dtype2, - multiply_output_id_table); + bitwise_xor_output_id_table); }; - m.def("_multiply", multiply_pyapi, "", py::arg("src1"), py::arg("src2"), - py::arg("dst"), py::arg("sycl_queue"), + m.def("_bitwise_xor", bitwise_xor_pyapi, "", py::arg("src1"), + py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); - m.def("_multiply_result_type", multiply_result_type_pyapi, ""); + m.def("_bitwise_xor_result_type", bitwise_xor_result_type_pyapi, ""); - using impl::multiply_inplace_contig_dispatch_table; - using impl::multiply_inplace_row_matrix_dispatch_table; - using impl::multiply_inplace_strided_dispatch_table; + using impl::bitwise_xor_inplace_contig_dispatch_table; + using impl::bitwise_xor_inplace_strided_dispatch_table; - auto multiply_inplace_pyapi = + auto bitwise_xor_inplace_pyapi = [&](const dpctl::tensor::usm_ndarray &src, const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, const std::vector &depends = {}) { return py_binary_inplace_ufunc( - src, dst, exec_q, depends, multiply_output_id_table, + src, dst, exec_q, depends, bitwise_xor_output_id_table, // function pointers to handle inplace operation on // contiguous arrays (pointers may be nullptr) - multiply_inplace_contig_dispatch_table, + bitwise_xor_inplace_contig_dispatch_table, // function pointers to handle inplace operation on strided // arrays (most general case) - multiply_inplace_strided_dispatch_table, + bitwise_xor_inplace_strided_dispatch_table, // function pointers to handle inplace operation on // c-contig matrix with c-contig row with broadcasting // (may be nullptr) - multiply_inplace_row_matrix_dispatch_table); + td_ns::NullPtrTable< + binary_inplace_row_matrix_broadcast_impl_fn_ptr_t>{}); }; - m.def("_multiply_inplace", multiply_inplace_pyapi, "", py::arg("lhs"), - py::arg("rhs"), py::arg("sycl_queue"), + m.def("_bitwise_xor_inplace", bitwise_xor_inplace_pyapi, "", + py::arg("lhs"), py::arg("rhs"), py::arg("sycl_queue"), py::arg("depends") = py::list()); } - // U25: ==== NEGATIVE (x) + // U09: ==== CEIL (x) { - impl::populate_negative_dispatch_vectors(); - using impl::negative_contig_dispatch_vector; - using impl::negative_output_typeid_vector; - using impl::negative_strided_dispatch_vector; + impl::populate_ceil_dispatch_vectors(); + using impl::ceil_contig_dispatch_vector; + using impl::ceil_output_typeid_vector; + using impl::ceil_strided_dispatch_vector; - auto negative_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc(src, dst, exec_q, depends, - negative_output_typeid_vector, - negative_contig_dispatch_vector, - negative_strided_dispatch_vector); + auto ceil_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, ceil_output_typeid_vector, + ceil_contig_dispatch_vector, ceil_strided_dispatch_vector); }; - m.def("_negative", negative_pyapi, "", py::arg("src"), py::arg("dst"), + m.def("_ceil", ceil_pyapi, "", py::arg("src"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); - auto negative_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, - negative_output_typeid_vector); + auto ceil_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, ceil_output_typeid_vector); }; - m.def("_negative_result_type", negative_result_type_pyapi); + m.def("_ceil_result_type", ceil_result_type_pyapi); } - // B20: ==== NOT_EQUAL (x1, x2) + // U10: ==== CONJ (x) { - impl::populate_not_equal_dispatch_tables(); - using impl::not_equal_contig_dispatch_table; - using impl::not_equal_output_id_table; - using impl::not_equal_strided_dispatch_table; - - auto not_equal_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, - const dpctl::tensor::usm_ndarray &src2, - const dpctl::tensor::usm_ndarray &dst, - sycl::queue &exec_q, - const std::vector &depends = - {}) { - return py_binary_ufunc( - src1, src2, dst, exec_q, depends, not_equal_output_id_table, - // function pointers to handle operation on contiguous arrays - // (pointers may be nullptr) - not_equal_contig_dispatch_table, - // function pointers to handle operation on strided arrays (most - // general case) - not_equal_strided_dispatch_table, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + impl::populate_conj_dispatch_vectors(); + using impl::conj_contig_dispatch_vector; + using impl::conj_output_typeid_vector; + using impl::conj_strided_dispatch_vector; + + auto conj_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, conj_output_typeid_vector, + conj_contig_dispatch_vector, conj_strided_dispatch_vector); }; - auto not_equal_result_type_pyapi = [&](const py::dtype &dtype1, - const py::dtype &dtype2) { - return py_binary_ufunc_result_type(dtype1, dtype2, - not_equal_output_id_table); + m.def("_conj", conj_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto conj_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, conj_output_typeid_vector); }; - m.def("_not_equal", not_equal_pyapi, "", py::arg("src1"), - py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - m.def("_not_equal_result_type", not_equal_result_type_pyapi, ""); + m.def("_conj_result_type", conj_result_type_pyapi); } - // U26: ==== POSITIVE (x) + // U11: ==== COS (x) { - impl::populate_positive_dispatch_vectors(); - using impl::positive_contig_dispatch_vector; - using impl::positive_output_typeid_vector; - using impl::positive_strided_dispatch_vector; + impl::populate_cos_dispatch_vectors(); + using impl::cos_contig_dispatch_vector; + using impl::cos_output_typeid_vector; + using impl::cos_strided_dispatch_vector; - auto positive_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc(src, dst, exec_q, depends, - positive_output_typeid_vector, - positive_contig_dispatch_vector, - positive_strided_dispatch_vector); + auto cos_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, cos_output_typeid_vector, + cos_contig_dispatch_vector, cos_strided_dispatch_vector); }; - m.def("_positive", positive_pyapi, "", py::arg("src"), py::arg("dst"), + m.def("_cos", cos_pyapi, "", py::arg("src"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); - auto positive_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, - positive_output_typeid_vector); + auto cos_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, cos_output_typeid_vector); }; - m.def("_positive_result_type", positive_result_type_pyapi); + m.def("_cos_result_type", cos_result_type_pyapi); } - // B21: ==== POW (x1, x2) + // U12: ==== COSH (x) { - impl::populate_pow_dispatch_tables(); - using impl::pow_contig_dispatch_table; - using impl::pow_output_id_table; - using impl::pow_strided_dispatch_table; + impl::populate_cosh_dispatch_vectors(); + using impl::cosh_contig_dispatch_vector; + using impl::cosh_output_typeid_vector; + using impl::cosh_strided_dispatch_vector; - auto pow_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, - const dpctl::tensor::usm_ndarray &src2, - const dpctl::tensor::usm_ndarray &dst, - sycl::queue &exec_q, - const std::vector &depends = {}) { + auto cosh_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, cosh_output_typeid_vector, + cosh_contig_dispatch_vector, cosh_strided_dispatch_vector); + }; + m.def("_cosh", cosh_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto cosh_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, cosh_output_typeid_vector); + }; + m.def("_cosh_result_type", cosh_result_type_pyapi); + } + + // B08: ==== DIVIDE (x1, x2) + { + impl::populate_true_divide_dispatch_tables(); + using impl::true_divide_contig_dispatch_table; + using impl:: + true_divide_contig_matrix_contig_row_broadcast_dispatch_table; + using impl:: + true_divide_contig_row_contig_matrix_broadcast_dispatch_table; + using impl::true_divide_output_id_table; + using impl::true_divide_strided_dispatch_table; + + auto divide_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, + const dpctl::tensor::usm_ndarray &src2, + const dpctl::tensor::usm_ndarray &dst, + sycl::queue &exec_q, + const std::vector &depends = {}) { return py_binary_ufunc( - src1, src2, dst, exec_q, depends, pow_output_id_table, + src1, src2, dst, exec_q, depends, true_divide_output_id_table, // function pointers to handle operation on contiguous arrays // (pointers may be nullptr) - pow_contig_dispatch_table, + true_divide_contig_dispatch_table, // function pointers to handle operation on strided arrays (most // general case) - pow_strided_dispatch_table, + true_divide_strided_dispatch_table, // function pointers to handle operation of c-contig matrix and // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + true_divide_contig_matrix_contig_row_broadcast_dispatch_table, // function pointers to handle operation of c-contig matrix and // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + true_divide_contig_row_contig_matrix_broadcast_dispatch_table); }; - auto pow_result_type_pyapi = [&](const py::dtype &dtype1, - const py::dtype &dtype2) { + auto divide_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { return py_binary_ufunc_result_type(dtype1, dtype2, - pow_output_id_table); + true_divide_output_id_table); }; - m.def("_pow", pow_pyapi, "", py::arg("src1"), py::arg("src2"), + m.def("_divide", divide_pyapi, "", py::arg("src1"), py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); - m.def("_pow_result_type", pow_result_type_pyapi, ""); + m.def("_divide_result_type", divide_result_type_pyapi, ""); - using impl::pow_inplace_contig_dispatch_table; - using impl::pow_inplace_strided_dispatch_table; + using impl::true_divide_inplace_contig_dispatch_table; + using impl::true_divide_inplace_output_id_table; + using impl::true_divide_inplace_row_matrix_dispatch_table; + using impl::true_divide_inplace_strided_dispatch_table; - auto pow_inplace_pyapi = + auto divide_inplace_pyapi = [&](const dpctl::tensor::usm_ndarray &src, const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, const std::vector &depends = {}) { return py_binary_inplace_ufunc( - src, dst, exec_q, depends, pow_output_id_table, + src, dst, exec_q, depends, + true_divide_inplace_output_id_table, // function pointers to handle inplace operation on // contiguous arrays (pointers may be nullptr) - pow_inplace_contig_dispatch_table, + true_divide_inplace_contig_dispatch_table, // function pointers to handle inplace operation on strided // arrays (most general case) - pow_inplace_strided_dispatch_table, + true_divide_inplace_strided_dispatch_table, // function pointers to handle inplace operation on // c-contig matrix with c-contig row with broadcasting // (may be nullptr) - td_ns::NullPtrTable< - binary_inplace_row_matrix_broadcast_impl_fn_ptr_t>{}); + true_divide_inplace_row_matrix_dispatch_table); }; - m.def("_pow_inplace", pow_inplace_pyapi, "", py::arg("lhs"), + m.def("_divide_inplace", divide_inplace_pyapi, "", py::arg("lhs"), py::arg("rhs"), py::arg("sycl_queue"), py::arg("depends") = py::list()); } - // U??: ==== PROJ (x) + // B09: ==== EQUAL (x1, x2) { - impl::populate_proj_dispatch_vectors(); - using impl::proj_contig_dispatch_vector; - using impl::proj_output_typeid_vector; - using impl::proj_strided_dispatch_vector; + impl::populate_equal_dispatch_tables(); + using impl::equal_contig_dispatch_table; + using impl::equal_output_id_table; + using impl::equal_strided_dispatch_table; - auto proj_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { + auto equal_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, + const dpctl::tensor::usm_ndarray &src2, + const dpctl::tensor::usm_ndarray &dst, + sycl::queue &exec_q, + const std::vector &depends = {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, equal_output_id_table, + // function pointers to handle operation on contiguous arrays + // (pointers may be nullptr) + equal_contig_dispatch_table, + // function pointers to handle operation on strided arrays (most + // general case) + equal_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + auto equal_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + equal_output_id_table); + }; + m.def("_equal", equal_pyapi, "", py::arg("src1"), py::arg("src2"), + py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_equal_result_type", equal_result_type_pyapi, ""); + } + + // U13: ==== EXP (x) + { + impl::populate_exp_dispatch_vectors(); + using impl::exp_contig_dispatch_vector; + using impl::exp_output_typeid_vector; + using impl::exp_strided_dispatch_vector; + + auto exp_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { return py_unary_ufunc( - src, dst, exec_q, depends, proj_output_typeid_vector, - proj_contig_dispatch_vector, proj_strided_dispatch_vector); + src, dst, exec_q, depends, exp_output_typeid_vector, + exp_contig_dispatch_vector, exp_strided_dispatch_vector); }; - m.def("_proj", proj_pyapi, "", py::arg("src"), py::arg("dst"), + m.def("_exp", exp_pyapi, "", py::arg("src"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); - auto proj_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, proj_output_typeid_vector); + auto exp_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, exp_output_typeid_vector); }; - m.def("_proj_result_type", proj_result_type_pyapi); + m.def("_exp_result_type", exp_result_type_pyapi); } - // U27: ==== REAL (x) + // U14: ==== EXPM1 (x) { - impl::populate_real_dispatch_vectors(); - using impl::real_contig_dispatch_vector; - using impl::real_output_typeid_vector; - using impl::real_strided_dispatch_vector; + impl::populate_expm1_dispatch_vectors(); + using impl::expm1_contig_dispatch_vector; + using impl::expm1_output_typeid_vector; + using impl::expm1_strided_dispatch_vector; - auto real_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { + auto expm1_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { return py_unary_ufunc( - src, dst, exec_q, depends, real_output_typeid_vector, - real_contig_dispatch_vector, real_strided_dispatch_vector); + src, dst, exec_q, depends, expm1_output_typeid_vector, + expm1_contig_dispatch_vector, expm1_strided_dispatch_vector); + }; + m.def("_expm1", expm1_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto expm1_result_type_pyapi = [&](const py::dtype dtype) { + return py_unary_ufunc_result_type(dtype, + expm1_output_typeid_vector); + }; + m.def("_expm1_result_type", expm1_result_type_pyapi); + } + + // U15: ==== FLOOR (x) + { + impl::populate_floor_dispatch_vectors(); + using impl::floor_contig_dispatch_vector; + using impl::floor_output_typeid_vector; + using impl::floor_strided_dispatch_vector; + + auto floor_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, floor_output_typeid_vector, + floor_contig_dispatch_vector, floor_strided_dispatch_vector); }; - m.def("_real", real_pyapi, "", py::arg("src"), py::arg("dst"), + m.def("_floor", floor_pyapi, "", py::arg("src"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); - auto real_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, real_output_typeid_vector); + auto floor_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, + floor_output_typeid_vector); }; - m.def("_real_result_type", real_result_type_pyapi); + m.def("_floor_result_type", floor_result_type_pyapi); } - // B22: ==== REMAINDER (x1, x2) + // B10: ==== FLOOR_DIVIDE (x1, x2) { - impl::populate_remainder_dispatch_tables(); - using impl::remainder_contig_dispatch_table; - using impl::remainder_output_id_table; - using impl::remainder_strided_dispatch_table; - - auto remainder_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, - const dpctl::tensor::usm_ndarray &src2, - const dpctl::tensor::usm_ndarray &dst, - sycl::queue &exec_q, - const std::vector &depends = - {}) { + impl::populate_floor_divide_dispatch_tables(); + using impl::floor_divide_contig_dispatch_table; + using impl::floor_divide_output_id_table; + using impl::floor_divide_strided_dispatch_table; + + auto floor_divide_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, + const dpctl::tensor::usm_ndarray &src2, + const dpctl::tensor::usm_ndarray &dst, + sycl::queue &exec_q, + const std::vector &depends = + {}) { return py_binary_ufunc( - src1, src2, dst, exec_q, depends, remainder_output_id_table, + src1, src2, dst, exec_q, depends, floor_divide_output_id_table, // function pointers to handle operation on contiguous arrays // (pointers may be nullptr) - remainder_contig_dispatch_table, + floor_divide_contig_dispatch_table, // function pointers to handle operation on strided arrays (most // general case) - remainder_strided_dispatch_table, + floor_divide_strided_dispatch_table, // function pointers to handle operation of c-contig matrix and // c-contig row with broadcasting (may be nullptr) td_ns::NullPtrTable< @@ -4988,339 +2585,218 @@ void init_elementwise_functions(py::module_ m) td_ns::NullPtrTable< binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); }; - auto remainder_result_type_pyapi = [&](const py::dtype &dtype1, - const py::dtype &dtype2) { + auto floor_divide_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { return py_binary_ufunc_result_type(dtype1, dtype2, - remainder_output_id_table); + floor_divide_output_id_table); }; - m.def("_remainder", remainder_pyapi, "", py::arg("src1"), + m.def("_floor_divide", floor_divide_pyapi, "", py::arg("src1"), py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); - m.def("_remainder_result_type", remainder_result_type_pyapi, ""); + m.def("_floor_divide_result_type", floor_divide_result_type_pyapi, ""); - using impl::remainder_inplace_contig_dispatch_table; - using impl::remainder_inplace_strided_dispatch_table; + using impl::floor_divide_inplace_contig_dispatch_table; + using impl::floor_divide_inplace_strided_dispatch_table; - auto remainder_inplace_pyapi = + auto floor_divide_inplace_pyapi = [&](const dpctl::tensor::usm_ndarray &src, const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, const std::vector &depends = {}) { return py_binary_inplace_ufunc( - src, dst, exec_q, depends, remainder_output_id_table, + src, dst, exec_q, depends, floor_divide_output_id_table, // function pointers to handle inplace operation on // contiguous arrays (pointers may be nullptr) - remainder_inplace_contig_dispatch_table, + floor_divide_inplace_contig_dispatch_table, // function pointers to handle inplace operation on strided // arrays (most general case) - remainder_inplace_strided_dispatch_table, + floor_divide_inplace_strided_dispatch_table, // function pointers to handle inplace operation on // c-contig matrix with c-contig row with broadcasting // (may be nullptr) td_ns::NullPtrTable< binary_inplace_row_matrix_broadcast_impl_fn_ptr_t>{}); }; - m.def("_remainder_inplace", remainder_inplace_pyapi, "", py::arg("lhs"), - py::arg("rhs"), py::arg("sycl_queue"), + m.def("_floor_divide_inplace", floor_divide_inplace_pyapi, "", + py::arg("lhs"), py::arg("rhs"), py::arg("sycl_queue"), py::arg("depends") = py::list()); } - // U28: ==== ROUND (x) - { - impl::populate_round_dispatch_vectors(); - using impl::round_contig_dispatch_vector; - using impl::round_output_typeid_vector; - using impl::round_strided_dispatch_vector; - - auto round_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, round_output_typeid_vector, - round_contig_dispatch_vector, round_strided_dispatch_vector); - }; - m.def("_round", round_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto round_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, - round_output_typeid_vector); - }; - m.def("_round_result_type", round_result_type_pyapi); - } - - // U29: ==== SIGN (x) - { - impl::populate_sign_dispatch_vectors(); - using impl::sign_contig_dispatch_vector; - using impl::sign_output_typeid_vector; - using impl::sign_strided_dispatch_vector; - - auto sign_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, sign_output_typeid_vector, - sign_contig_dispatch_vector, sign_strided_dispatch_vector); - }; - m.def("_sign", sign_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto sign_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, sign_output_typeid_vector); - }; - m.def("_sign_result_type", sign_result_type_pyapi); - } - - // ==== SIGNBIT (x) + // B11: ==== GREATER (x1, x2) { - impl::populate_signbit_dispatch_vectors(); - using impl::signbit_contig_dispatch_vector; - using impl::signbit_output_typeid_vector; - using impl::signbit_strided_dispatch_vector; + impl::populate_greater_dispatch_tables(); + using impl::greater_contig_dispatch_table; + using impl::greater_output_id_table; + using impl::greater_strided_dispatch_table; - auto signbit_pyapi = [&](const arrayT &src, const arrayT &dst, + auto greater_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, + const dpctl::tensor::usm_ndarray &src2, + const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc(src, dst, exec_q, depends, - signbit_output_typeid_vector, - signbit_contig_dispatch_vector, - signbit_strided_dispatch_vector); - }; - m.def("_signbit", signbit_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto signbit_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, - signbit_output_typeid_vector); - }; - m.def("_signbit_result_type", signbit_result_type_pyapi); - } - - // U30: ==== SIN (x) - { - impl::populate_sin_dispatch_vectors(); - using impl::sin_contig_dispatch_vector; - using impl::sin_output_typeid_vector; - using impl::sin_strided_dispatch_vector; - - auto sin_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, sin_output_typeid_vector, - sin_contig_dispatch_vector, sin_strided_dispatch_vector); - }; - m.def("_sin", sin_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto sin_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, sin_output_typeid_vector); - }; - m.def("_sin_result_type", sin_result_type_pyapi); - } - // U31: ==== SINH (x) - { - impl::populate_sinh_dispatch_vectors(); - using impl::sinh_contig_dispatch_vector; - using impl::sinh_output_typeid_vector; - using impl::sinh_strided_dispatch_vector; - - auto sinh_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, sinh_output_typeid_vector, - sinh_contig_dispatch_vector, sinh_strided_dispatch_vector); + const std::vector &depends = {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, greater_output_id_table, + // function pointers to handle operation on contiguous arrays + // (pointers may be nullptr) + greater_contig_dispatch_table, + // function pointers to handle operation on strided arrays (most + // general case) + greater_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); }; - m.def("_sinh", sinh_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto sinh_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, sinh_output_typeid_vector); + auto greater_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + greater_output_id_table); }; - m.def("_sinh_result_type", sinh_result_type_pyapi); + m.def("_greater", greater_pyapi, "", py::arg("src1"), py::arg("src2"), + py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_greater_result_type", greater_result_type_pyapi, ""); } - // U32: ==== SQUARE (x) + // B12: ==== GREATER_EQUAL (x1, x2) { - impl::populate_square_dispatch_vectors(); - using impl::square_contig_dispatch_vector; - using impl::square_output_typeid_vector; - using impl::square_strided_dispatch_vector; + impl::populate_greater_equal_dispatch_tables(); + using impl::greater_equal_contig_dispatch_table; + using impl::greater_equal_output_id_table; + using impl::greater_equal_strided_dispatch_table; - auto square_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, square_output_typeid_vector, - square_contig_dispatch_vector, square_strided_dispatch_vector); + auto greater_equal_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, + const dpctl::tensor::usm_ndarray &src2, + const dpctl::tensor::usm_ndarray &dst, + sycl::queue &exec_q, + const std::vector &depends = + {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, greater_equal_output_id_table, + // function pointers to handle operation on contiguous arrays + // (pointers may be nullptr) + greater_equal_contig_dispatch_table, + // function pointers to handle operation on strided arrays (most + // general case) + greater_equal_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); }; - m.def("_square", square_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto square_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, - square_output_typeid_vector); + auto greater_equal_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + greater_equal_output_id_table); }; - m.def("_square_result_type", square_result_type_pyapi); + m.def("_greater_equal", greater_equal_pyapi, "", py::arg("src1"), + py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_greater_equal_result_type", greater_equal_result_type_pyapi, + ""); } - // U33: ==== SQRT (x) + // U16: ==== IMAG (x) { - impl::populate_sqrt_dispatch_vectors(); - using impl::sqrt_contig_dispatch_vector; - using impl::sqrt_output_typeid_vector; - using impl::sqrt_strided_dispatch_vector; + impl::populate_imag_dispatch_vectors(); + using impl::imag_contig_dispatch_vector; + using impl::imag_output_typeid_vector; + using impl::imag_strided_dispatch_vector; - auto sqrt_pyapi = [&](const arrayT &src, const arrayT &dst, + auto imag_pyapi = [&](const arrayT &src, const arrayT &dst, sycl::queue &exec_q, const event_vecT &depends = {}) { return py_unary_ufunc( - src, dst, exec_q, depends, sqrt_output_typeid_vector, - sqrt_contig_dispatch_vector, sqrt_strided_dispatch_vector); + src, dst, exec_q, depends, imag_output_typeid_vector, + imag_contig_dispatch_vector, imag_strided_dispatch_vector); }; - m.def("_sqrt", sqrt_pyapi, "", py::arg("src"), py::arg("dst"), + m.def("_imag", imag_pyapi, "", py::arg("src"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); - auto sqrt_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, sqrt_output_typeid_vector); + auto imag_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, imag_output_typeid_vector); }; - m.def("_sqrt_result_type", sqrt_result_type_pyapi); + m.def("_imag_result_type", imag_result_type_pyapi); } - // B23: ==== SUBTRACT (x1, x2) + // U17: ==== ISFINITE (x) { - impl::populate_subtract_dispatch_tables(); - using impl::subtract_contig_dispatch_table; - using impl::subtract_contig_matrix_contig_row_broadcast_dispatch_table; - using impl::subtract_contig_row_contig_matrix_broadcast_dispatch_table; - using impl::subtract_output_id_table; - using impl::subtract_strided_dispatch_table; - - auto subtract_pyapi = - [&](const dpctl::tensor::usm_ndarray &src1, - const dpctl::tensor::usm_ndarray &src2, - const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, - const std::vector &depends = {}) { - return py_binary_ufunc( - src1, src2, dst, exec_q, depends, subtract_output_id_table, - // function pointers to handle operation on contiguous - // arrays (pointers may be nullptr) - subtract_contig_dispatch_table, - // function pointers to handle operation on strided arrays - // (most general case) - subtract_strided_dispatch_table, - // function pointers to handle operation of c-contig matrix - // and c-contig row with broadcasting (may be nullptr) - subtract_contig_matrix_contig_row_broadcast_dispatch_table, - // function pointers to handle operation of c-contig matrix - // and c-contig row with broadcasting (may be nullptr) - subtract_contig_row_contig_matrix_broadcast_dispatch_table); - }; - auto subtract_result_type_pyapi = [&](const py::dtype &dtype1, - const py::dtype &dtype2) { - return py_binary_ufunc_result_type(dtype1, dtype2, - subtract_output_id_table); - }; - m.def("_subtract", subtract_pyapi, "", py::arg("src1"), py::arg("src2"), - py::arg("dst"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - m.def("_subtract_result_type", subtract_result_type_pyapi, ""); - - using impl::subtract_inplace_contig_dispatch_table; - using impl::subtract_inplace_row_matrix_dispatch_table; - using impl::subtract_inplace_strided_dispatch_table; + impl::populate_isfinite_dispatch_vectors(); - auto subtract_inplace_pyapi = + using impl::isfinite_contig_dispatch_vector; + using impl::isfinite_output_typeid_vector; + using impl::isfinite_strided_dispatch_vector; + auto isfinite_pyapi = [&](const dpctl::tensor::usm_ndarray &src, const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, const std::vector &depends = {}) { - return py_binary_inplace_ufunc( - src, dst, exec_q, depends, subtract_output_id_table, - // function pointers to handle inplace operation on - // contiguous arrays (pointers may be nullptr) - subtract_inplace_contig_dispatch_table, - // function pointers to handle inplace operation on strided - // arrays (most general case) - subtract_inplace_strided_dispatch_table, - // function pointers to handle inplace operation on - // c-contig matrix with c-contig row with broadcasting - // (may be nullptr) - subtract_inplace_row_matrix_dispatch_table); + return py_unary_ufunc(src, dst, exec_q, depends, + isfinite_output_typeid_vector, + isfinite_contig_dispatch_vector, + isfinite_strided_dispatch_vector); }; - m.def("_subtract_inplace", subtract_inplace_pyapi, "", py::arg("lhs"), - py::arg("rhs"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - } - - // U34: ==== TAN (x) - { - impl::populate_tan_dispatch_vectors(); - using impl::tan_contig_dispatch_vector; - using impl::tan_output_typeid_vector; - using impl::tan_strided_dispatch_vector; - - auto tan_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, tan_output_typeid_vector, - tan_contig_dispatch_vector, tan_strided_dispatch_vector); + auto isfinite_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, + isfinite_output_typeid_vector); }; - m.def("_tan", tan_pyapi, "", py::arg("src"), py::arg("dst"), + m.def("_isfinite", isfinite_pyapi, "", py::arg("src"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto tan_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, tan_output_typeid_vector); - }; - m.def("_tan_result_type", tan_result_type_pyapi); + m.def("_isfinite_result_type", isfinite_result_type_pyapi, ""); } - // U35: ==== TANH (x) + // U18: ==== ISINF (x) { - impl::populate_tanh_dispatch_vectors(); - using impl::tanh_contig_dispatch_vector; - using impl::tanh_output_typeid_vector; - using impl::tanh_strided_dispatch_vector; + impl::populate_isinf_dispatch_vectors(); - auto tanh_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { + using impl::isinf_contig_dispatch_vector; + using impl::isinf_output_typeid_vector; + using impl::isinf_strided_dispatch_vector; + auto isinf_pyapi = [&](const dpctl::tensor::usm_ndarray &src, + const dpctl::tensor::usm_ndarray &dst, + sycl::queue &exec_q, + const std::vector &depends = {}) { return py_unary_ufunc( - src, dst, exec_q, depends, tanh_output_typeid_vector, - tanh_contig_dispatch_vector, tanh_strided_dispatch_vector); + src, dst, exec_q, depends, isinf_output_typeid_vector, + isinf_contig_dispatch_vector, isinf_strided_dispatch_vector); }; - m.def("_tanh", tanh_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto tanh_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, tanh_output_typeid_vector); + auto isinf_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, + isinf_output_typeid_vector); }; - m.def("_tanh_result_type", tanh_result_type_pyapi); + m.def("_isinf", isinf_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + m.def("_isinf_result_type", isinf_result_type_pyapi, ""); } - // U36: ==== TRUNC (x) + // U19: ==== ISNAN (x) { - impl::populate_trunc_dispatch_vectors(); - using impl::trunc_contig_dispatch_vector; - using impl::trunc_output_typeid_vector; - using impl::trunc_strided_dispatch_vector; + impl::populate_isnan_dispatch_vectors(); - auto trunc_pyapi = [&](const arrayT &src, const arrayT &dst, + using impl::isnan_contig_dispatch_vector; + using impl::isnan_output_typeid_vector; + using impl::isnan_strided_dispatch_vector; + auto isnan_pyapi = [&](const dpctl::tensor::usm_ndarray &src, + const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, - const event_vecT &depends = {}) { + const std::vector &depends = {}) { return py_unary_ufunc( - src, dst, exec_q, depends, trunc_output_typeid_vector, - trunc_contig_dispatch_vector, trunc_strided_dispatch_vector); + src, dst, exec_q, depends, isnan_output_typeid_vector, + isnan_contig_dispatch_vector, isnan_strided_dispatch_vector); }; - m.def("_trunc", trunc_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto trunc_result_type_pyapi = [&](const py::dtype &dtype) { + auto isnan_result_type_pyapi = [&](const py::dtype &dtype) { return py_unary_ufunc_result_type(dtype, - trunc_output_typeid_vector); + isnan_output_typeid_vector); }; - m.def("_trunc_result_type", trunc_result_type_pyapi); + m.def("_isnan", isnan_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + m.def("_isnan_result_type", isnan_result_type_pyapi, ""); } // B24: ==== HYPOT (x1, x2) @@ -5449,30 +2925,6 @@ void init_elementwise_functions(py::module_ m) }; m.def("_exp2_result_type", exp2_result_type_pyapi); } - - // U39: ==== RSQRT (x) - { - impl::populate_rsqrt_dispatch_vectors(); - using impl::rsqrt_contig_dispatch_vector; - using impl::rsqrt_output_typeid_vector; - using impl::rsqrt_strided_dispatch_vector; - - auto rsqrt_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, rsqrt_output_typeid_vector, - rsqrt_contig_dispatch_vector, rsqrt_strided_dispatch_vector); - }; - m.def("_rsqrt", rsqrt_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto rsqrt_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, - rsqrt_output_typeid_vector); - }; - m.def("_rsqrt_result_type", rsqrt_result_type_pyapi); - } } } // namespace py_internal diff --git a/dpctl/tensor/libtensor/source/elementwise_functions2.hpp b/dpctl/tensor/libtensor/source/elementwise_functions2.hpp new file mode 100644 index 0000000000..b712ddd136 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions2.hpp @@ -0,0 +1,40 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#pragma once +#include + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +extern void init_elementwise_functions2(py::module_ m); + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/tensor_py.cpp b/dpctl/tensor/libtensor/source/tensor_py.cpp index 0e8b4236b6..ad17a71c9a 100644 --- a/dpctl/tensor/libtensor/source/tensor_py.cpp +++ b/dpctl/tensor/libtensor/source/tensor_py.cpp @@ -42,7 +42,8 @@ #include "copy_for_roll.hpp" #include "copy_numpy_ndarray_into_usm_ndarray.hpp" #include "device_support_queries.hpp" -#include "elementwise_functions.hpp" +#include "elementwise_functions1.hpp" +#include "elementwise_functions2.hpp" #include "eye_ctor.hpp" #include "full_ctor.hpp" #include "integer_advanced_indexing.hpp" @@ -441,7 +442,8 @@ PYBIND11_MODULE(_tensor_impl, m) py::arg("reps"), py::arg("axis"), py::arg("sycl_queue"), py::arg("depends") = py::list()); - dpctl::tensor::py_internal::init_elementwise_functions(m); + dpctl::tensor::py_internal::init_elementwise_functions1(m); + dpctl::tensor::py_internal::init_elementwise_functions2(m); dpctl::tensor::py_internal::init_boolean_reduction_functions(m); dpctl::tensor::py_internal::init_reduction_functions(m); } From 8094f6d1c3745df7fc5f5001a16ad5d2e71be2d4 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Sat, 21 Oct 2023 20:32:47 -0500 Subject: [PATCH 09/19] Added missing includes of common_inplace --- .../include/kernels/elementwise_functions/floor_divide.hpp | 1 + .../libtensor/include/kernels/elementwise_functions/pow.hpp | 1 + 2 files changed, 2 insertions(+) diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/floor_divide.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/floor_divide.hpp index 025d7e8bc4..241c0e7ca8 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/floor_divide.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/floor_divide.hpp @@ -34,6 +34,7 @@ #include "utils/type_utils.hpp" #include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/common_inplace.hpp" #include namespace dpctl diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/pow.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/pow.hpp index b52939148b..6654bae384 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/pow.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/pow.hpp @@ -35,6 +35,7 @@ #include "utils/type_utils.hpp" #include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/common_inplace.hpp" #include namespace dpctl From c2ed9a7208fb6c4bf094c14fd36b9b55066f2bcd Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Sat, 21 Oct 2023 20:33:51 -0500 Subject: [PATCH 10/19] Split elementwise functions into two more files and added them to the build --- dpctl/tensor/CMakeLists.txt | 6 +- .../source/elementwise_functions1.cpp | 1987 +++----------- .../source/elementwise_functions2.cpp | 2292 +++-------------- .../source/elementwise_functions3.cpp | 1407 ++++++++++ .../source/elementwise_functions3.hpp | 40 + .../source/elementwise_functions4.cpp | 1551 +++++++++++ .../source/elementwise_functions4.hpp | 40 + dpctl/tensor/libtensor/source/tensor_py.cpp | 4 + 8 files changed, 3780 insertions(+), 3547 deletions(-) create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions3.cpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions3.hpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions4.cpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions4.hpp diff --git a/dpctl/tensor/CMakeLists.txt b/dpctl/tensor/CMakeLists.txt index 81702fa486..fa23fd237e 100644 --- a/dpctl/tensor/CMakeLists.txt +++ b/dpctl/tensor/CMakeLists.txt @@ -49,6 +49,8 @@ set(_tensor_impl_sources ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/device_support_queries.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions1.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions2.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions3.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions4.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/repeat.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/reduction_over_axis.cpp ) @@ -66,7 +68,9 @@ set(_no_fast_math_sources ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/linear_sequences.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions1.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions2.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/reduction_over_axis.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions3.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions4.cpp +${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/reduction_over_axis.cpp ) foreach(_src_fn ${_no_fast_math_sources}) get_source_file_property(_cmpl_options_prop ${_src_fn} COMPILE_OPTIONS) diff --git a/dpctl/tensor/libtensor/source/elementwise_functions1.cpp b/dpctl/tensor/libtensor/source/elementwise_functions1.cpp index 7de986856c..305655230d 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions1.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions1.cpp @@ -49,24 +49,6 @@ #include "kernels/elementwise_functions/minimum.hpp" #include "kernels/elementwise_functions/multiply.hpp" #include "kernels/elementwise_functions/negative.hpp" -#include "kernels/elementwise_functions/not_equal.hpp" -#include "kernels/elementwise_functions/positive.hpp" -#include "kernels/elementwise_functions/pow.hpp" -#include "kernels/elementwise_functions/proj.hpp" -#include "kernels/elementwise_functions/real.hpp" -#include "kernels/elementwise_functions/remainder.hpp" -#include "kernels/elementwise_functions/round.hpp" -#include "kernels/elementwise_functions/rsqrt.hpp" -#include "kernels/elementwise_functions/sign.hpp" -#include "kernels/elementwise_functions/signbit.hpp" -#include "kernels/elementwise_functions/sin.hpp" -#include "kernels/elementwise_functions/sinh.hpp" -#include "kernels/elementwise_functions/sqrt.hpp" -#include "kernels/elementwise_functions/square.hpp" -#include "kernels/elementwise_functions/subtract.hpp" -#include "kernels/elementwise_functions/tan.hpp" -#include "kernels/elementwise_functions/tanh.hpp" -#include "kernels/elementwise_functions/trunc.hpp" namespace dpctl { @@ -709,1004 +691,36 @@ void populate_negative_dispatch_vectors(void) } // namespace impl -// B20: ==== NOT_EQUAL (x1, x2) -namespace impl -{ -namespace not_equal_fn_ns = dpctl::tensor::kernels::not_equal; - -static binary_contig_impl_fn_ptr_t - not_equal_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; -static int not_equal_output_id_table[td_ns::num_types][td_ns::num_types]; - -static binary_strided_impl_fn_ptr_t - not_equal_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; - -void populate_not_equal_dispatch_tables(void) -{ - using namespace td_ns; - namespace fn_ns = not_equal_fn_ns; - - // which input types are supported, and what is the type of the result - using fn_ns::NotEqualTypeMapFactory; - DispatchTableBuilder dtb1; - dtb1.populate_dispatch_table(not_equal_output_id_table); - - // function pointers for operation on general strided arrays - using fn_ns::NotEqualStridedFactory; - DispatchTableBuilder - dtb2; - dtb2.populate_dispatch_table(not_equal_strided_dispatch_table); - - // function pointers for operation on contiguous inputs and output - using fn_ns::NotEqualContigFactory; - DispatchTableBuilder - dtb3; - dtb3.populate_dispatch_table(not_equal_contig_dispatch_table); -}; -} // namespace impl - -// U26: ==== POSITIVE (x) -namespace impl -{ - -namespace positive_fn_ns = dpctl::tensor::kernels::positive; - -static unary_contig_impl_fn_ptr_t - positive_contig_dispatch_vector[td_ns::num_types]; -static int positive_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - positive_strided_dispatch_vector[td_ns::num_types]; - -void populate_positive_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = positive_fn_ns; - - using fn_ns::PositiveContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(positive_contig_dispatch_vector); - - using fn_ns::PositiveStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(positive_strided_dispatch_vector); - - using fn_ns::PositiveTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(positive_output_typeid_vector); -} - -} // namespace impl - -// B21: ==== POW (x1, x2) -namespace impl -{ - -namespace pow_fn_ns = dpctl::tensor::kernels::pow; - -static binary_contig_impl_fn_ptr_t pow_contig_dispatch_table[td_ns::num_types] - [td_ns::num_types]; -static int pow_output_id_table[td_ns::num_types][td_ns::num_types]; - -static binary_strided_impl_fn_ptr_t - pow_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; - -static binary_inplace_contig_impl_fn_ptr_t - pow_inplace_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; -static binary_inplace_strided_impl_fn_ptr_t - pow_inplace_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; - -void populate_pow_dispatch_tables(void) -{ - using namespace td_ns; - namespace fn_ns = pow_fn_ns; - - // which input types are supported, and what is the type of the result - using fn_ns::PowTypeMapFactory; - DispatchTableBuilder dtb1; - dtb1.populate_dispatch_table(pow_output_id_table); - - // function pointers for operation on general strided arrays - using fn_ns::PowStridedFactory; - DispatchTableBuilder - dtb2; - dtb2.populate_dispatch_table(pow_strided_dispatch_table); - - // function pointers for operation on contiguous inputs and output - using fn_ns::PowContigFactory; - DispatchTableBuilder - dtb3; - dtb3.populate_dispatch_table(pow_contig_dispatch_table); - - // function pointers for inplace operation on general strided arrays - using fn_ns::PowInplaceStridedFactory; - DispatchTableBuilder - dtb4; - dtb4.populate_dispatch_table(pow_inplace_strided_dispatch_table); - - // function pointers for inplace operation on contiguous inputs and output - using fn_ns::PowInplaceContigFactory; - DispatchTableBuilder - dtb5; - dtb5.populate_dispatch_table(pow_inplace_contig_dispatch_table); -}; - -} // namespace impl - -// U??: ==== PROJ (x) -namespace impl -{ - -namespace proj_fn_ns = dpctl::tensor::kernels::proj; - -static unary_contig_impl_fn_ptr_t proj_contig_dispatch_vector[td_ns::num_types]; -static int proj_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - proj_strided_dispatch_vector[td_ns::num_types]; - -void populate_proj_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = proj_fn_ns; - - using fn_ns::ProjContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(proj_contig_dispatch_vector); - - using fn_ns::ProjStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(proj_strided_dispatch_vector); - - using fn_ns::ProjTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(proj_output_typeid_vector); -} -} // namespace impl - -// U27: ==== REAL (x) -namespace impl -{ - -namespace real_fn_ns = dpctl::tensor::kernels::real; - -static unary_contig_impl_fn_ptr_t real_contig_dispatch_vector[td_ns::num_types]; -static int real_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - real_strided_dispatch_vector[td_ns::num_types]; - -void populate_real_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = real_fn_ns; - - using fn_ns::RealContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(real_contig_dispatch_vector); - - using fn_ns::RealStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(real_strided_dispatch_vector); - - using fn_ns::RealTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(real_output_typeid_vector); -} -} // namespace impl - -// B22: ==== REMAINDER (x1, x2) -namespace impl -{ - -namespace remainder_fn_ns = dpctl::tensor::kernels::remainder; - -static binary_contig_impl_fn_ptr_t - remainder_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; -static int remainder_output_id_table[td_ns::num_types][td_ns::num_types]; - -static binary_strided_impl_fn_ptr_t - remainder_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; - -static binary_inplace_contig_impl_fn_ptr_t - remainder_inplace_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; -static binary_inplace_strided_impl_fn_ptr_t - remainder_inplace_strided_dispatch_table[td_ns::num_types] - [td_ns::num_types]; - -void populate_remainder_dispatch_tables(void) -{ - using namespace td_ns; - namespace fn_ns = remainder_fn_ns; - - // which input types are supported, and what is the type of the result - using fn_ns::RemainderTypeMapFactory; - DispatchTableBuilder dtb1; - dtb1.populate_dispatch_table(remainder_output_id_table); - - // function pointers for operation on general strided arrays - using fn_ns::RemainderStridedFactory; - DispatchTableBuilder - dtb2; - dtb2.populate_dispatch_table(remainder_strided_dispatch_table); - - // function pointers for operation on contiguous inputs and output - using fn_ns::RemainderContigFactory; - DispatchTableBuilder - dtb3; - dtb3.populate_dispatch_table(remainder_contig_dispatch_table); - - // function pointers for inplace operation on general strided arrays - using fn_ns::RemainderInplaceStridedFactory; - DispatchTableBuilder - dtb4; - dtb4.populate_dispatch_table(remainder_inplace_strided_dispatch_table); - - // function pointers for inplace operation on contiguous inputs and output - using fn_ns::RemainderInplaceContigFactory; - DispatchTableBuilder - dtb5; - dtb5.populate_dispatch_table(remainder_inplace_contig_dispatch_table); -} - -} // namespace impl - -// U28: ==== ROUND (x) -namespace impl -{ - -namespace round_fn_ns = dpctl::tensor::kernels::round; - -static unary_contig_impl_fn_ptr_t - round_contig_dispatch_vector[td_ns::num_types]; -static int round_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - round_strided_dispatch_vector[td_ns::num_types]; - -void populate_round_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = round_fn_ns; - - using fn_ns::RoundContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(round_contig_dispatch_vector); - - using fn_ns::RoundStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(round_strided_dispatch_vector); - - using fn_ns::RoundTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(round_output_typeid_vector); -} - -} // namespace impl - -// U29: ==== SIGN (x) -namespace impl -{ - -namespace sign_fn_ns = dpctl::tensor::kernels::sign; - -static unary_contig_impl_fn_ptr_t sign_contig_dispatch_vector[td_ns::num_types]; -static int sign_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - sign_strided_dispatch_vector[td_ns::num_types]; - -void populate_sign_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = sign_fn_ns; - - using fn_ns::SignContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(sign_contig_dispatch_vector); - - using fn_ns::SignStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(sign_strided_dispatch_vector); - - using fn_ns::SignTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(sign_output_typeid_vector); -} - -} // namespace impl - -// ==== SIGNBIT (x) -namespace impl -{ - -namespace signbit_fn_ns = dpctl::tensor::kernels::signbit; - -static unary_contig_impl_fn_ptr_t - signbit_contig_dispatch_vector[td_ns::num_types]; -static int signbit_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - signbit_strided_dispatch_vector[td_ns::num_types]; - -void populate_signbit_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = signbit_fn_ns; - - using fn_ns::SignbitContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(signbit_contig_dispatch_vector); - - using fn_ns::SignbitStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(signbit_strided_dispatch_vector); - - using fn_ns::SignbitTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(signbit_output_typeid_vector); -} - -} // namespace impl - -// U30: ==== SIN (x) -namespace impl -{ - -namespace sin_fn_ns = dpctl::tensor::kernels::sin; - -static unary_contig_impl_fn_ptr_t sin_contig_dispatch_vector[td_ns::num_types]; -static int sin_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - sin_strided_dispatch_vector[td_ns::num_types]; - -void populate_sin_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = sin_fn_ns; - - using fn_ns::SinContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(sin_contig_dispatch_vector); - - using fn_ns::SinStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(sin_strided_dispatch_vector); - - using fn_ns::SinTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(sin_output_typeid_vector); -} - -} // namespace impl - -// U31: ==== SINH (x) -namespace impl -{ - -namespace sinh_fn_ns = dpctl::tensor::kernels::sinh; - -static unary_contig_impl_fn_ptr_t sinh_contig_dispatch_vector[td_ns::num_types]; -static int sinh_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - sinh_strided_dispatch_vector[td_ns::num_types]; - -void populate_sinh_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = sinh_fn_ns; - - using fn_ns::SinhContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(sinh_contig_dispatch_vector); - - using fn_ns::SinhStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(sinh_strided_dispatch_vector); - - using fn_ns::SinhTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(sinh_output_typeid_vector); -} - -} // namespace impl - -// U32: ==== SQUARE (x) -namespace impl -{ - -namespace square_fn_ns = dpctl::tensor::kernels::square; - -static unary_contig_impl_fn_ptr_t - square_contig_dispatch_vector[td_ns::num_types]; -static int square_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - square_strided_dispatch_vector[td_ns::num_types]; - -void populate_square_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = square_fn_ns; - - using fn_ns::SquareContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(square_contig_dispatch_vector); - - using fn_ns::SquareStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(square_strided_dispatch_vector); - - using fn_ns::SquareTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(square_output_typeid_vector); -} - -} // namespace impl - -// U33: ==== SQRT (x) -namespace impl -{ - -namespace sqrt_fn_ns = dpctl::tensor::kernels::sqrt; - -static unary_contig_impl_fn_ptr_t sqrt_contig_dispatch_vector[td_ns::num_types]; -static int sqrt_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - sqrt_strided_dispatch_vector[td_ns::num_types]; - -void populate_sqrt_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = sqrt_fn_ns; - - using fn_ns::SqrtContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(sqrt_contig_dispatch_vector); - - using fn_ns::SqrtStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(sqrt_strided_dispatch_vector); - - using fn_ns::SqrtTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(sqrt_output_typeid_vector); -} - -} // namespace impl - -// B23: ==== SUBTRACT (x1, x2) -namespace impl -{ -namespace subtract_fn_ns = dpctl::tensor::kernels::subtract; - -static binary_contig_impl_fn_ptr_t - subtract_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; -static int subtract_output_id_table[td_ns::num_types][td_ns::num_types]; - -static binary_strided_impl_fn_ptr_t - subtract_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; - -// sub(matrix, row) -static binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t - subtract_contig_matrix_contig_row_broadcast_dispatch_table - [td_ns::num_types][td_ns::num_types]; - -// sub(row, matrix) -static binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t - subtract_contig_row_contig_matrix_broadcast_dispatch_table - [td_ns::num_types][td_ns::num_types]; - -static binary_inplace_contig_impl_fn_ptr_t - subtract_inplace_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; -static binary_inplace_strided_impl_fn_ptr_t - subtract_inplace_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; -static binary_inplace_row_matrix_broadcast_impl_fn_ptr_t - subtract_inplace_row_matrix_dispatch_table[td_ns::num_types] - [td_ns::num_types]; - -void populate_subtract_dispatch_tables(void) -{ - using namespace td_ns; - namespace fn_ns = subtract_fn_ns; - - // which input types are supported, and what is the type of the result - using fn_ns::SubtractTypeMapFactory; - DispatchTableBuilder dtb1; - dtb1.populate_dispatch_table(subtract_output_id_table); - - // function pointers for operation on general strided arrays - using fn_ns::SubtractStridedFactory; - DispatchTableBuilder - dtb2; - dtb2.populate_dispatch_table(subtract_strided_dispatch_table); - - // function pointers for operation on contiguous inputs and output - using fn_ns::SubtractContigFactory; - DispatchTableBuilder - dtb3; - dtb3.populate_dispatch_table(subtract_contig_dispatch_table); - - // function pointers for operation on contiguous matrix, contiguous row - // with contiguous matrix output - using fn_ns::SubtractContigMatrixContigRowBroadcastFactory; - DispatchTableBuilder< - binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t, - SubtractContigMatrixContigRowBroadcastFactory, num_types> - dtb4; - dtb4.populate_dispatch_table( - subtract_contig_matrix_contig_row_broadcast_dispatch_table); - - // function pointers for operation on contiguous row, contiguous matrix - // with contiguous matrix output - using fn_ns::SubtractContigRowContigMatrixBroadcastFactory; - DispatchTableBuilder< - binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t, - SubtractContigRowContigMatrixBroadcastFactory, num_types> - dtb5; - dtb5.populate_dispatch_table( - subtract_contig_row_contig_matrix_broadcast_dispatch_table); - - // function pointers for inplace operation on general strided arrays - using fn_ns::SubtractInplaceStridedFactory; - DispatchTableBuilder - dtb6; - dtb6.populate_dispatch_table(subtract_inplace_strided_dispatch_table); - - // function pointers for inplace operation on contiguous inputs and output - using fn_ns::SubtractInplaceContigFactory; - DispatchTableBuilder - dtb7; - dtb7.populate_dispatch_table(subtract_inplace_contig_dispatch_table); - - // function pointers for inplace operation on contiguous matrix - // and contiguous row - using fn_ns::SubtractInplaceRowMatrixBroadcastFactory; - DispatchTableBuilder - dtb8; - dtb8.populate_dispatch_table(subtract_inplace_row_matrix_dispatch_table); -}; - -} // namespace impl - -// U34: ==== TAN (x) -namespace impl -{ - -namespace tan_fn_ns = dpctl::tensor::kernels::tan; - -static unary_contig_impl_fn_ptr_t tan_contig_dispatch_vector[td_ns::num_types]; -static int tan_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - tan_strided_dispatch_vector[td_ns::num_types]; - -void populate_tan_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = tan_fn_ns; - - using fn_ns::TanContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(tan_contig_dispatch_vector); - - using fn_ns::TanStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(tan_strided_dispatch_vector); - - using fn_ns::TanTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(tan_output_typeid_vector); -} - -} // namespace impl - -// U35: ==== TANH (x) -namespace impl -{ - -namespace tanh_fn_ns = dpctl::tensor::kernels::tanh; - -static unary_contig_impl_fn_ptr_t tanh_contig_dispatch_vector[td_ns::num_types]; -static int tanh_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - tanh_strided_dispatch_vector[td_ns::num_types]; - -void populate_tanh_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = tanh_fn_ns; - - using fn_ns::TanhContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(tanh_contig_dispatch_vector); - - using fn_ns::TanhStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(tanh_strided_dispatch_vector); - - using fn_ns::TanhTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(tanh_output_typeid_vector); -} - -} // namespace impl - -// U36: ==== TRUNC (x) -namespace impl -{ - -namespace trunc_fn_ns = dpctl::tensor::kernels::trunc; - -static unary_contig_impl_fn_ptr_t - trunc_contig_dispatch_vector[td_ns::num_types]; -static int trunc_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - trunc_strided_dispatch_vector[td_ns::num_types]; - -void populate_trunc_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = trunc_fn_ns; - - using fn_ns::TruncContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(trunc_contig_dispatch_vector); - - using fn_ns::TruncStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(trunc_strided_dispatch_vector); - - using fn_ns::TruncTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(trunc_output_typeid_vector); -} - -} // namespace impl - -// U39: ==== RSQRT (x) -namespace impl -{ - -namespace rsqrt_fn_ns = dpctl::tensor::kernels::rsqrt; - -static unary_contig_impl_fn_ptr_t - rsqrt_contig_dispatch_vector[td_ns::num_types]; -static int rsqrt_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - rsqrt_strided_dispatch_vector[td_ns::num_types]; - -void populate_rsqrt_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = rsqrt_fn_ns; - - using fn_ns::RsqrtContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(rsqrt_contig_dispatch_vector); - - using fn_ns::RsqrtStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(rsqrt_strided_dispatch_vector); - - using fn_ns::RsqrtTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(rsqrt_output_typeid_vector); -} - -} // namespace impl - -// ========================================================================================== -// // - -namespace py = pybind11; - -void init_elementwise_functions1(py::module_ m) -{ - - using arrayT = dpctl::tensor::usm_ndarray; - using event_vecT = std::vector; - - // B13: ==== LESS (x1, x2) - { - impl::populate_less_dispatch_tables(); - using impl::less_contig_dispatch_table; - using impl::less_output_id_table; - using impl::less_strided_dispatch_table; - - auto less_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, - const dpctl::tensor::usm_ndarray &src2, - const dpctl::tensor::usm_ndarray &dst, - sycl::queue &exec_q, - const std::vector &depends = {}) { - return py_binary_ufunc( - src1, src2, dst, exec_q, depends, less_output_id_table, - // function pointers to handle operation on contiguous arrays - // (pointers may be nullptr) - less_contig_dispatch_table, - // function pointers to handle operation on strided arrays (most - // general case) - less_strided_dispatch_table, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); - }; - auto less_result_type_pyapi = [&](const py::dtype &dtype1, - const py::dtype &dtype2) { - return py_binary_ufunc_result_type(dtype1, dtype2, - less_output_id_table); - }; - m.def("_less", less_pyapi, "", py::arg("src1"), py::arg("src2"), - py::arg("dst"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - m.def("_less_result_type", less_result_type_pyapi, ""); - } - - // B14: ==== LESS_EQUAL (x1, x2) - { - impl::populate_less_equal_dispatch_tables(); - using impl::less_equal_contig_dispatch_table; - using impl::less_equal_output_id_table; - using impl::less_equal_strided_dispatch_table; - - auto less_equal_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, - const dpctl::tensor::usm_ndarray &src2, - const dpctl::tensor::usm_ndarray &dst, - sycl::queue &exec_q, - const std::vector &depends = - {}) { - return py_binary_ufunc( - src1, src2, dst, exec_q, depends, less_equal_output_id_table, - // function pointers to handle operation on contiguous arrays - // (pointers may be nullptr) - less_equal_contig_dispatch_table, - // function pointers to handle operation on strided arrays (most - // general case) - less_equal_strided_dispatch_table, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); - }; - auto less_equal_result_type_pyapi = [&](const py::dtype &dtype1, - const py::dtype &dtype2) { - return py_binary_ufunc_result_type(dtype1, dtype2, - less_equal_output_id_table); - }; - m.def("_less_equal", less_equal_pyapi, "", py::arg("src1"), - py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - m.def("_less_equal_result_type", less_equal_result_type_pyapi, ""); - } - - // U20: ==== LOG (x) - { - impl::populate_log_dispatch_vectors(); - using impl::log_contig_dispatch_vector; - using impl::log_output_typeid_vector; - using impl::log_strided_dispatch_vector; - - auto log_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, log_output_typeid_vector, - log_contig_dispatch_vector, log_strided_dispatch_vector); - }; - m.def("_log", log_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto log_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, log_output_typeid_vector); - }; - m.def("_log_result_type", log_result_type_pyapi); - } - - // U21: ==== LOG1P (x) - { - impl::populate_log1p_dispatch_vectors(); - using impl::log1p_contig_dispatch_vector; - using impl::log1p_output_typeid_vector; - using impl::log1p_strided_dispatch_vector; - - auto log1p_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, log1p_output_typeid_vector, - log1p_contig_dispatch_vector, log1p_strided_dispatch_vector); - }; - m.def("_log1p", log1p_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto log1p_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, - log1p_output_typeid_vector); - }; - m.def("_log1p_result_type", log1p_result_type_pyapi); - } - - // U22: ==== LOG2 (x) - { - impl::populate_log2_dispatch_vectors(); - - using impl::log2_contig_dispatch_vector; - using impl::log2_output_typeid_vector; - using impl::log2_strided_dispatch_vector; - auto log2_pyapi = [&](const dpctl::tensor::usm_ndarray &src, - const dpctl::tensor::usm_ndarray &dst, - sycl::queue &exec_q, - const std::vector &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, log2_output_typeid_vector, - log2_contig_dispatch_vector, log2_strided_dispatch_vector); - }; - auto log2_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, log2_output_typeid_vector); - }; - m.def("_log2", log2_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - m.def("_log2_result_type", log2_result_type_pyapi, ""); - } - - // U23: ==== LOG10 (x) - { - impl::populate_log10_dispatch_vectors(); - - using impl::log10_contig_dispatch_vector; - using impl::log10_output_typeid_vector; - using impl::log10_strided_dispatch_vector; - auto log10_pyapi = [&](const dpctl::tensor::usm_ndarray &src, - const dpctl::tensor::usm_ndarray &dst, - sycl::queue &exec_q, - const std::vector &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, log10_output_typeid_vector, - log10_contig_dispatch_vector, log10_strided_dispatch_vector); - }; - auto log10_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, - log10_output_typeid_vector); - }; - m.def("_log10", log10_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - m.def("_log10_result_type", log10_result_type_pyapi, ""); - } +// ========================================================================= // - // B15: ==== LOGADDEXP (x1, x2) - { - impl::populate_logaddexp_dispatch_tables(); - using impl::logaddexp_contig_dispatch_table; - using impl::logaddexp_output_id_table; - using impl::logaddexp_strided_dispatch_table; +namespace py = pybind11; - auto logaddexp_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, - const dpctl::tensor::usm_ndarray &src2, - const dpctl::tensor::usm_ndarray &dst, - sycl::queue &exec_q, - const std::vector &depends = - {}) { - return py_binary_ufunc( - src1, src2, dst, exec_q, depends, logaddexp_output_id_table, - // function pointers to handle operation on contiguous arrays - // (pointers may be nullptr) - logaddexp_contig_dispatch_table, - // function pointers to handle operation on strided arrays (most - // general case) - logaddexp_strided_dispatch_table, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); - }; - auto logaddexp_result_type_pyapi = [&](const py::dtype &dtype1, - const py::dtype &dtype2) { - return py_binary_ufunc_result_type(dtype1, dtype2, - logaddexp_output_id_table); - }; - m.def("_logaddexp", logaddexp_pyapi, "", py::arg("src1"), - py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - m.def("_logaddexp_result_type", logaddexp_result_type_pyapi, ""); - } +void init_elementwise_functions1(py::module_ m) +{ - // B16: ==== LOGICAL_AND (x1, x2) + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + + // B13: ==== LESS (x1, x2) { - impl::populate_logical_and_dispatch_tables(); - using impl::logical_and_contig_dispatch_table; - using impl::logical_and_output_id_table; - using impl::logical_and_strided_dispatch_table; + impl::populate_less_dispatch_tables(); + using impl::less_contig_dispatch_table; + using impl::less_output_id_table; + using impl::less_strided_dispatch_table; - auto logical_and_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, - const dpctl::tensor::usm_ndarray &src2, - const dpctl::tensor::usm_ndarray &dst, - sycl::queue &exec_q, - const std::vector &depends = - {}) { + auto less_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, + const dpctl::tensor::usm_ndarray &src2, + const dpctl::tensor::usm_ndarray &dst, + sycl::queue &exec_q, + const std::vector &depends = {}) { return py_binary_ufunc( - src1, src2, dst, exec_q, depends, logical_and_output_id_table, + src1, src2, dst, exec_q, depends, less_output_id_table, // function pointers to handle operation on contiguous arrays // (pointers may be nullptr) - logical_and_contig_dispatch_table, + less_contig_dispatch_table, // function pointers to handle operation on strided arrays (most // general case) - logical_and_strided_dispatch_table, + less_strided_dispatch_table, // function pointers to handle operation of c-contig matrix and // c-contig row with broadcasting (may be nullptr) td_ns::NullPtrTable< @@ -1716,64 +730,38 @@ void init_elementwise_functions1(py::module_ m) td_ns::NullPtrTable< binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); }; - auto logical_and_result_type_pyapi = [&](const py::dtype &dtype1, - const py::dtype &dtype2) { + auto less_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { return py_binary_ufunc_result_type(dtype1, dtype2, - logical_and_output_id_table); - }; - m.def("_logical_and", logical_and_pyapi, "", py::arg("src1"), - py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - m.def("_logical_and_result_type", logical_and_result_type_pyapi, ""); - } - - // U24: ==== LOGICAL_NOT (x) - { - impl::populate_logical_not_dispatch_vectors(); - using impl::logical_not_contig_dispatch_vector; - using impl::logical_not_output_typeid_vector; - using impl::logical_not_strided_dispatch_vector; - - auto logical_not_pyapi = [&](const arrayT &src, arrayT dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc(src, dst, exec_q, depends, - logical_not_output_typeid_vector, - logical_not_contig_dispatch_vector, - logical_not_strided_dispatch_vector); + less_output_id_table); }; - m.def("_logical_not", logical_not_pyapi, "", py::arg("src"), + m.def("_less", less_pyapi, "", py::arg("src1"), py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto logical_not_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, - logical_not_output_typeid_vector); - }; - m.def("_logical_not_result_type", logical_not_result_type_pyapi); + m.def("_less_result_type", less_result_type_pyapi, ""); } - // B17: ==== LOGICAL_OR (x1, x2) + // B14: ==== LESS_EQUAL (x1, x2) { - impl::populate_logical_or_dispatch_tables(); - using impl::logical_or_contig_dispatch_table; - using impl::logical_or_output_id_table; - using impl::logical_or_strided_dispatch_table; + impl::populate_less_equal_dispatch_tables(); + using impl::less_equal_contig_dispatch_table; + using impl::less_equal_output_id_table; + using impl::less_equal_strided_dispatch_table; - auto logical_or_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, + auto less_equal_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, const dpctl::tensor::usm_ndarray &src2, const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, const std::vector &depends = {}) { return py_binary_ufunc( - src1, src2, dst, exec_q, depends, logical_or_output_id_table, + src1, src2, dst, exec_q, depends, less_equal_output_id_table, // function pointers to handle operation on contiguous arrays // (pointers may be nullptr) - logical_or_contig_dispatch_table, + less_equal_contig_dispatch_table, // function pointers to handle operation on strided arrays (most // general case) - logical_or_strided_dispatch_table, + less_equal_strided_dispatch_table, // function pointers to handle operation of c-contig matrix and // c-contig row with broadcasting (may be nullptr) td_ns::NullPtrTable< @@ -1783,249 +771,132 @@ void init_elementwise_functions1(py::module_ m) td_ns::NullPtrTable< binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); }; - auto logical_or_result_type_pyapi = [&](const py::dtype &dtype1, + auto less_equal_result_type_pyapi = [&](const py::dtype &dtype1, const py::dtype &dtype2) { return py_binary_ufunc_result_type(dtype1, dtype2, - logical_or_output_id_table); - }; - m.def("_logical_or", logical_or_pyapi, "", py::arg("src1"), - py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - m.def("_logical_or_result_type", logical_or_result_type_pyapi, ""); - } - - // B18: ==== LOGICAL_XOR (x1, x2) - { - impl::populate_logical_xor_dispatch_tables(); - using impl::logical_xor_contig_dispatch_table; - using impl::logical_xor_output_id_table; - using impl::logical_xor_strided_dispatch_table; - - auto logical_xor_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, - const dpctl::tensor::usm_ndarray &src2, - const dpctl::tensor::usm_ndarray &dst, - sycl::queue &exec_q, - const std::vector &depends = - {}) { - return py_binary_ufunc( - src1, src2, dst, exec_q, depends, logical_xor_output_id_table, - // function pointers to handle operation on contiguous arrays - // (pointers may be nullptr) - logical_xor_contig_dispatch_table, - // function pointers to handle operation on strided arrays (most - // general case) - logical_xor_strided_dispatch_table, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); - }; - auto logical_xor_result_type_pyapi = [&](const py::dtype &dtype1, - const py::dtype &dtype2) { - return py_binary_ufunc_result_type(dtype1, dtype2, - logical_xor_output_id_table); + less_equal_output_id_table); }; - m.def("_logical_xor", logical_xor_pyapi, "", py::arg("src1"), + m.def("_less_equal", less_equal_pyapi, "", py::arg("src1"), py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); - m.def("_logical_xor_result_type", logical_xor_result_type_pyapi, ""); - } - - // B??: ==== MAXIMUM (x1, x2) - { - impl::populate_maximum_dispatch_tables(); - using impl::maximum_contig_dispatch_table; - using impl::maximum_output_id_table; - using impl::maximum_strided_dispatch_table; - - auto maximum_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, - const dpctl::tensor::usm_ndarray &src2, - const dpctl::tensor::usm_ndarray &dst, - sycl::queue &exec_q, - const std::vector &depends = {}) { - return py_binary_ufunc( - src1, src2, dst, exec_q, depends, maximum_output_id_table, - // function pointers to handle operation on contiguous - // arrays (pointers may be nullptr) - maximum_contig_dispatch_table, - // function pointers to handle operation on strided arrays - // (most general case) - maximum_strided_dispatch_table, - // function pointers to handle operation of c-contig matrix - // and c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, - // function pointers to handle operation of c-contig matrix - // and c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); - }; - auto maximum_result_type_pyapi = [&](const py::dtype &dtype1, - const py::dtype &dtype2) { - return py_binary_ufunc_result_type(dtype1, dtype2, - maximum_output_id_table); - }; - m.def("_maximum", maximum_pyapi, "", py::arg("src1"), py::arg("src2"), - py::arg("dst"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - m.def("_maximum_result_type", maximum_result_type_pyapi, ""); + m.def("_less_equal_result_type", less_equal_result_type_pyapi, ""); } - // B??: ==== MINIMUM (x1, x2) + // U20: ==== LOG (x) { - impl::populate_minimum_dispatch_tables(); - using impl::minimum_contig_dispatch_table; - using impl::minimum_output_id_table; - using impl::minimum_strided_dispatch_table; + impl::populate_log_dispatch_vectors(); + using impl::log_contig_dispatch_vector; + using impl::log_output_typeid_vector; + using impl::log_strided_dispatch_vector; - auto minimum_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, - const dpctl::tensor::usm_ndarray &src2, - const dpctl::tensor::usm_ndarray &dst, - sycl::queue &exec_q, - const std::vector &depends = {}) { - return py_binary_ufunc( - src1, src2, dst, exec_q, depends, minimum_output_id_table, - // function pointers to handle operation on contiguous - // arrays (pointers may be nullptr) - minimum_contig_dispatch_table, - // function pointers to handle operation on strided arrays - // (most general case) - minimum_strided_dispatch_table, - // function pointers to handle operation of c-contig matrix - // and c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + auto log_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, log_output_typeid_vector, + log_contig_dispatch_vector, log_strided_dispatch_vector); }; - auto minimum_result_type_pyapi = [&](const py::dtype &dtype1, - const py::dtype &dtype2) { - return py_binary_ufunc_result_type(dtype1, dtype2, - minimum_output_id_table); + m.def("_log", log_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto log_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, log_output_typeid_vector); }; - m.def("_minimum", minimum_pyapi, "", py::arg("src1"), py::arg("src2"), - py::arg("dst"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - m.def("_minimum_result_type", minimum_result_type_pyapi, ""); + m.def("_log_result_type", log_result_type_pyapi); } - // B19: ==== MULTIPLY (x1, x2) + // U21: ==== LOG1P (x) { - impl::populate_multiply_dispatch_tables(); - using impl::multiply_contig_dispatch_table; - using impl::multiply_contig_matrix_contig_row_broadcast_dispatch_table; - using impl::multiply_contig_row_contig_matrix_broadcast_dispatch_table; - using impl::multiply_output_id_table; - using impl::multiply_strided_dispatch_table; + impl::populate_log1p_dispatch_vectors(); + using impl::log1p_contig_dispatch_vector; + using impl::log1p_output_typeid_vector; + using impl::log1p_strided_dispatch_vector; - auto multiply_pyapi = - [&](const dpctl::tensor::usm_ndarray &src1, - const dpctl::tensor::usm_ndarray &src2, - const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, - const std::vector &depends = {}) { - return py_binary_ufunc( - src1, src2, dst, exec_q, depends, multiply_output_id_table, - // function pointers to handle operation on contiguous - // arrays (pointers may be nullptr) - multiply_contig_dispatch_table, - // function pointers to handle operation on strided arrays - // (most general case) - multiply_strided_dispatch_table, - // function pointers to handle operation of c-contig matrix - // and c-contig row with broadcasting (may be nullptr) - multiply_contig_matrix_contig_row_broadcast_dispatch_table, - // function pointers to handle operation of c-contig matrix - // and c-contig row with broadcasting (may be nullptr) - multiply_contig_row_contig_matrix_broadcast_dispatch_table); - }; - auto multiply_result_type_pyapi = [&](const py::dtype &dtype1, - const py::dtype &dtype2) { - return py_binary_ufunc_result_type(dtype1, dtype2, - multiply_output_id_table); + auto log1p_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, log1p_output_typeid_vector, + log1p_contig_dispatch_vector, log1p_strided_dispatch_vector); }; - m.def("_multiply", multiply_pyapi, "", py::arg("src1"), py::arg("src2"), - py::arg("dst"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - m.def("_multiply_result_type", multiply_result_type_pyapi, ""); - - using impl::multiply_inplace_contig_dispatch_table; - using impl::multiply_inplace_row_matrix_dispatch_table; - using impl::multiply_inplace_strided_dispatch_table; + m.def("_log1p", log1p_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); - auto multiply_inplace_pyapi = - [&](const dpctl::tensor::usm_ndarray &src, - const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, - const std::vector &depends = {}) { - return py_binary_inplace_ufunc( - src, dst, exec_q, depends, multiply_output_id_table, - // function pointers to handle inplace operation on - // contiguous arrays (pointers may be nullptr) - multiply_inplace_contig_dispatch_table, - // function pointers to handle inplace operation on strided - // arrays (most general case) - multiply_inplace_strided_dispatch_table, - // function pointers to handle inplace operation on - // c-contig matrix with c-contig row with broadcasting - // (may be nullptr) - multiply_inplace_row_matrix_dispatch_table); - }; - m.def("_multiply_inplace", multiply_inplace_pyapi, "", py::arg("lhs"), - py::arg("rhs"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); + auto log1p_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, + log1p_output_typeid_vector); + }; + m.def("_log1p_result_type", log1p_result_type_pyapi); } - // U25: ==== NEGATIVE (x) + // U22: ==== LOG2 (x) { - impl::populate_negative_dispatch_vectors(); - using impl::negative_contig_dispatch_vector; - using impl::negative_output_typeid_vector; - using impl::negative_strided_dispatch_vector; + impl::populate_log2_dispatch_vectors(); - auto negative_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc(src, dst, exec_q, depends, - negative_output_typeid_vector, - negative_contig_dispatch_vector, - negative_strided_dispatch_vector); + using impl::log2_contig_dispatch_vector; + using impl::log2_output_typeid_vector; + using impl::log2_strided_dispatch_vector; + auto log2_pyapi = [&](const dpctl::tensor::usm_ndarray &src, + const dpctl::tensor::usm_ndarray &dst, + sycl::queue &exec_q, + const std::vector &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, log2_output_typeid_vector, + log2_contig_dispatch_vector, log2_strided_dispatch_vector); }; - m.def("_negative", negative_pyapi, "", py::arg("src"), py::arg("dst"), + auto log2_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, log2_output_typeid_vector); + }; + m.def("_log2", log2_pyapi, "", py::arg("src"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); + m.def("_log2_result_type", log2_result_type_pyapi, ""); + } - auto negative_result_type_pyapi = [&](const py::dtype &dtype) { + // U23: ==== LOG10 (x) + { + impl::populate_log10_dispatch_vectors(); + + using impl::log10_contig_dispatch_vector; + using impl::log10_output_typeid_vector; + using impl::log10_strided_dispatch_vector; + auto log10_pyapi = [&](const dpctl::tensor::usm_ndarray &src, + const dpctl::tensor::usm_ndarray &dst, + sycl::queue &exec_q, + const std::vector &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, log10_output_typeid_vector, + log10_contig_dispatch_vector, log10_strided_dispatch_vector); + }; + auto log10_result_type_pyapi = [&](const py::dtype &dtype) { return py_unary_ufunc_result_type(dtype, - negative_output_typeid_vector); + log10_output_typeid_vector); }; - m.def("_negative_result_type", negative_result_type_pyapi); + m.def("_log10", log10_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + m.def("_log10_result_type", log10_result_type_pyapi, ""); } - // B20: ==== NOT_EQUAL (x1, x2) + // B15: ==== LOGADDEXP (x1, x2) { - impl::populate_not_equal_dispatch_tables(); - using impl::not_equal_contig_dispatch_table; - using impl::not_equal_output_id_table; - using impl::not_equal_strided_dispatch_table; + impl::populate_logaddexp_dispatch_tables(); + using impl::logaddexp_contig_dispatch_table; + using impl::logaddexp_output_id_table; + using impl::logaddexp_strided_dispatch_table; - auto not_equal_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, + auto logaddexp_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, const dpctl::tensor::usm_ndarray &src2, const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, const std::vector &depends = {}) { return py_binary_ufunc( - src1, src2, dst, exec_q, depends, not_equal_output_id_table, + src1, src2, dst, exec_q, depends, logaddexp_output_id_table, // function pointers to handle operation on contiguous arrays // (pointers may be nullptr) - not_equal_contig_dispatch_table, + logaddexp_contig_dispatch_table, // function pointers to handle operation on strided arrays (most // general case) - not_equal_strided_dispatch_table, + logaddexp_strided_dispatch_table, // function pointers to handle operation of c-contig matrix and // c-contig row with broadcasting (may be nullptr) td_ns::NullPtrTable< @@ -2035,62 +906,38 @@ void init_elementwise_functions1(py::module_ m) td_ns::NullPtrTable< binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); }; - auto not_equal_result_type_pyapi = [&](const py::dtype &dtype1, + auto logaddexp_result_type_pyapi = [&](const py::dtype &dtype1, const py::dtype &dtype2) { return py_binary_ufunc_result_type(dtype1, dtype2, - not_equal_output_id_table); + logaddexp_output_id_table); }; - m.def("_not_equal", not_equal_pyapi, "", py::arg("src1"), + m.def("_logaddexp", logaddexp_pyapi, "", py::arg("src1"), py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); - m.def("_not_equal_result_type", not_equal_result_type_pyapi, ""); + m.def("_logaddexp_result_type", logaddexp_result_type_pyapi, ""); } - // U26: ==== POSITIVE (x) + // B16: ==== LOGICAL_AND (x1, x2) { - impl::populate_positive_dispatch_vectors(); - using impl::positive_contig_dispatch_vector; - using impl::positive_output_typeid_vector; - using impl::positive_strided_dispatch_vector; - - auto positive_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc(src, dst, exec_q, depends, - positive_output_typeid_vector, - positive_contig_dispatch_vector, - positive_strided_dispatch_vector); - }; - m.def("_positive", positive_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto positive_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, - positive_output_typeid_vector); - }; - m.def("_positive_result_type", positive_result_type_pyapi); - } + impl::populate_logical_and_dispatch_tables(); + using impl::logical_and_contig_dispatch_table; + using impl::logical_and_output_id_table; + using impl::logical_and_strided_dispatch_table; - // B21: ==== POW (x1, x2) - { - impl::populate_pow_dispatch_tables(); - using impl::pow_contig_dispatch_table; - using impl::pow_output_id_table; - using impl::pow_strided_dispatch_table; - - auto pow_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, - const dpctl::tensor::usm_ndarray &src2, - const dpctl::tensor::usm_ndarray &dst, - sycl::queue &exec_q, - const std::vector &depends = {}) { + auto logical_and_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, + const dpctl::tensor::usm_ndarray &src2, + const dpctl::tensor::usm_ndarray &dst, + sycl::queue &exec_q, + const std::vector &depends = + {}) { return py_binary_ufunc( - src1, src2, dst, exec_q, depends, pow_output_id_table, + src1, src2, dst, exec_q, depends, logical_and_output_id_table, // function pointers to handle operation on contiguous arrays // (pointers may be nullptr) - pow_contig_dispatch_table, + logical_and_contig_dispatch_table, // function pointers to handle operation on strided arrays (most // general case) - pow_strided_dispatch_table, + logical_and_strided_dispatch_table, // function pointers to handle operation of c-contig matrix and // c-contig row with broadcasting (may be nullptr) td_ns::NullPtrTable< @@ -2100,109 +947,105 @@ void init_elementwise_functions1(py::module_ m) td_ns::NullPtrTable< binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); }; - auto pow_result_type_pyapi = [&](const py::dtype &dtype1, - const py::dtype &dtype2) { + auto logical_and_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { return py_binary_ufunc_result_type(dtype1, dtype2, - pow_output_id_table); + logical_and_output_id_table); }; - m.def("_pow", pow_pyapi, "", py::arg("src1"), py::arg("src2"), - py::arg("dst"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - m.def("_pow_result_type", pow_result_type_pyapi, ""); - - using impl::pow_inplace_contig_dispatch_table; - using impl::pow_inplace_strided_dispatch_table; - - auto pow_inplace_pyapi = - [&](const dpctl::tensor::usm_ndarray &src, - const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, - const std::vector &depends = {}) { - return py_binary_inplace_ufunc( - src, dst, exec_q, depends, pow_output_id_table, - // function pointers to handle inplace operation on - // contiguous arrays (pointers may be nullptr) - pow_inplace_contig_dispatch_table, - // function pointers to handle inplace operation on strided - // arrays (most general case) - pow_inplace_strided_dispatch_table, - // function pointers to handle inplace operation on - // c-contig matrix with c-contig row with broadcasting - // (may be nullptr) - td_ns::NullPtrTable< - binary_inplace_row_matrix_broadcast_impl_fn_ptr_t>{}); - }; - m.def("_pow_inplace", pow_inplace_pyapi, "", py::arg("lhs"), - py::arg("rhs"), py::arg("sycl_queue"), + m.def("_logical_and", logical_and_pyapi, "", py::arg("src1"), + py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); + m.def("_logical_and_result_type", logical_and_result_type_pyapi, ""); } - // U??: ==== PROJ (x) + // U24: ==== LOGICAL_NOT (x) { - impl::populate_proj_dispatch_vectors(); - using impl::proj_contig_dispatch_vector; - using impl::proj_output_typeid_vector; - using impl::proj_strided_dispatch_vector; + impl::populate_logical_not_dispatch_vectors(); + using impl::logical_not_contig_dispatch_vector; + using impl::logical_not_output_typeid_vector; + using impl::logical_not_strided_dispatch_vector; - auto proj_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, proj_output_typeid_vector, - proj_contig_dispatch_vector, proj_strided_dispatch_vector); + auto logical_not_pyapi = [&](const arrayT &src, arrayT dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc(src, dst, exec_q, depends, + logical_not_output_typeid_vector, + logical_not_contig_dispatch_vector, + logical_not_strided_dispatch_vector); }; - m.def("_proj", proj_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); + m.def("_logical_not", logical_not_pyapi, "", py::arg("src"), + py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); - auto proj_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, proj_output_typeid_vector); + auto logical_not_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, + logical_not_output_typeid_vector); }; - m.def("_proj_result_type", proj_result_type_pyapi); + m.def("_logical_not_result_type", logical_not_result_type_pyapi); } - // U27: ==== REAL (x) + // B17: ==== LOGICAL_OR (x1, x2) { - impl::populate_real_dispatch_vectors(); - using impl::real_contig_dispatch_vector; - using impl::real_output_typeid_vector; - using impl::real_strided_dispatch_vector; + impl::populate_logical_or_dispatch_tables(); + using impl::logical_or_contig_dispatch_table; + using impl::logical_or_output_id_table; + using impl::logical_or_strided_dispatch_table; - auto real_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, real_output_typeid_vector, - real_contig_dispatch_vector, real_strided_dispatch_vector); + auto logical_or_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, + const dpctl::tensor::usm_ndarray &src2, + const dpctl::tensor::usm_ndarray &dst, + sycl::queue &exec_q, + const std::vector &depends = + {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, logical_or_output_id_table, + // function pointers to handle operation on contiguous arrays + // (pointers may be nullptr) + logical_or_contig_dispatch_table, + // function pointers to handle operation on strided arrays (most + // general case) + logical_or_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); }; - m.def("_real", real_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto real_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, real_output_typeid_vector); + auto logical_or_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + logical_or_output_id_table); }; - m.def("_real_result_type", real_result_type_pyapi); + m.def("_logical_or", logical_or_pyapi, "", py::arg("src1"), + py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_logical_or_result_type", logical_or_result_type_pyapi, ""); } - // B22: ==== REMAINDER (x1, x2) + // B18: ==== LOGICAL_XOR (x1, x2) { - impl::populate_remainder_dispatch_tables(); - using impl::remainder_contig_dispatch_table; - using impl::remainder_output_id_table; - using impl::remainder_strided_dispatch_table; + impl::populate_logical_xor_dispatch_tables(); + using impl::logical_xor_contig_dispatch_table; + using impl::logical_xor_output_id_table; + using impl::logical_xor_strided_dispatch_table; - auto remainder_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, - const dpctl::tensor::usm_ndarray &src2, - const dpctl::tensor::usm_ndarray &dst, - sycl::queue &exec_q, - const std::vector &depends = - {}) { + auto logical_xor_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, + const dpctl::tensor::usm_ndarray &src2, + const dpctl::tensor::usm_ndarray &dst, + sycl::queue &exec_q, + const std::vector &depends = + {}) { return py_binary_ufunc( - src1, src2, dst, exec_q, depends, remainder_output_id_table, + src1, src2, dst, exec_q, depends, logical_xor_output_id_table, // function pointers to handle operation on contiguous arrays // (pointers may be nullptr) - remainder_contig_dispatch_table, + logical_xor_contig_dispatch_table, // function pointers to handle operation on strided arrays (most // general case) - remainder_strided_dispatch_table, + logical_xor_strided_dispatch_table, // function pointers to handle operation of c-contig matrix and // c-contig row with broadcasting (may be nullptr) td_ns::NullPtrTable< @@ -2212,363 +1055,185 @@ void init_elementwise_functions1(py::module_ m) td_ns::NullPtrTable< binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); }; - auto remainder_result_type_pyapi = [&](const py::dtype &dtype1, - const py::dtype &dtype2) { + auto logical_xor_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { return py_binary_ufunc_result_type(dtype1, dtype2, - remainder_output_id_table); + logical_xor_output_id_table); }; - m.def("_remainder", remainder_pyapi, "", py::arg("src1"), + m.def("_logical_xor", logical_xor_pyapi, "", py::arg("src1"), py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); - m.def("_remainder_result_type", remainder_result_type_pyapi, ""); - - using impl::remainder_inplace_contig_dispatch_table; - using impl::remainder_inplace_strided_dispatch_table; - - auto remainder_inplace_pyapi = - [&](const dpctl::tensor::usm_ndarray &src, - const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, - const std::vector &depends = {}) { - return py_binary_inplace_ufunc( - src, dst, exec_q, depends, remainder_output_id_table, - // function pointers to handle inplace operation on - // contiguous arrays (pointers may be nullptr) - remainder_inplace_contig_dispatch_table, - // function pointers to handle inplace operation on strided - // arrays (most general case) - remainder_inplace_strided_dispatch_table, - // function pointers to handle inplace operation on - // c-contig matrix with c-contig row with broadcasting - // (may be nullptr) - td_ns::NullPtrTable< - binary_inplace_row_matrix_broadcast_impl_fn_ptr_t>{}); - }; - m.def("_remainder_inplace", remainder_inplace_pyapi, "", py::arg("lhs"), - py::arg("rhs"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - } - - // U28: ==== ROUND (x) - { - impl::populate_round_dispatch_vectors(); - using impl::round_contig_dispatch_vector; - using impl::round_output_typeid_vector; - using impl::round_strided_dispatch_vector; - - auto round_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, round_output_typeid_vector, - round_contig_dispatch_vector, round_strided_dispatch_vector); - }; - m.def("_round", round_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto round_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, - round_output_typeid_vector); - }; - m.def("_round_result_type", round_result_type_pyapi); - } - - // U29: ==== SIGN (x) - { - impl::populate_sign_dispatch_vectors(); - using impl::sign_contig_dispatch_vector; - using impl::sign_output_typeid_vector; - using impl::sign_strided_dispatch_vector; - - auto sign_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, sign_output_typeid_vector, - sign_contig_dispatch_vector, sign_strided_dispatch_vector); - }; - m.def("_sign", sign_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto sign_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, sign_output_typeid_vector); - }; - m.def("_sign_result_type", sign_result_type_pyapi); + m.def("_logical_xor_result_type", logical_xor_result_type_pyapi, ""); } - // ==== SIGNBIT (x) + // B??: ==== MAXIMUM (x1, x2) { - impl::populate_signbit_dispatch_vectors(); - using impl::signbit_contig_dispatch_vector; - using impl::signbit_output_typeid_vector; - using impl::signbit_strided_dispatch_vector; + impl::populate_maximum_dispatch_tables(); + using impl::maximum_contig_dispatch_table; + using impl::maximum_output_id_table; + using impl::maximum_strided_dispatch_table; - auto signbit_pyapi = [&](const arrayT &src, const arrayT &dst, + auto maximum_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, + const dpctl::tensor::usm_ndarray &src2, + const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc(src, dst, exec_q, depends, - signbit_output_typeid_vector, - signbit_contig_dispatch_vector, - signbit_strided_dispatch_vector); + const std::vector &depends = {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, maximum_output_id_table, + // function pointers to handle operation on contiguous + // arrays (pointers may be nullptr) + maximum_contig_dispatch_table, + // function pointers to handle operation on strided arrays + // (most general case) + maximum_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix + // and c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + // function pointers to handle operation of c-contig matrix + // and c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); }; - m.def("_signbit", signbit_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto signbit_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, - signbit_output_typeid_vector); + auto maximum_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + maximum_output_id_table); }; - m.def("_signbit_result_type", signbit_result_type_pyapi); + m.def("_maximum", maximum_pyapi, "", py::arg("src1"), py::arg("src2"), + py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_maximum_result_type", maximum_result_type_pyapi, ""); } - // U30: ==== SIN (x) - { - impl::populate_sin_dispatch_vectors(); - using impl::sin_contig_dispatch_vector; - using impl::sin_output_typeid_vector; - using impl::sin_strided_dispatch_vector; - - auto sin_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, sin_output_typeid_vector, - sin_contig_dispatch_vector, sin_strided_dispatch_vector); - }; - m.def("_sin", sin_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto sin_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, sin_output_typeid_vector); - }; - m.def("_sin_result_type", sin_result_type_pyapi); - } - // U31: ==== SINH (x) + // B??: ==== MINIMUM (x1, x2) { - impl::populate_sinh_dispatch_vectors(); - using impl::sinh_contig_dispatch_vector; - using impl::sinh_output_typeid_vector; - using impl::sinh_strided_dispatch_vector; - - auto sinh_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, sinh_output_typeid_vector, - sinh_contig_dispatch_vector, sinh_strided_dispatch_vector); - }; - m.def("_sinh", sinh_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto sinh_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, sinh_output_typeid_vector); - }; - m.def("_sinh_result_type", sinh_result_type_pyapi); - } + impl::populate_minimum_dispatch_tables(); + using impl::minimum_contig_dispatch_table; + using impl::minimum_output_id_table; + using impl::minimum_strided_dispatch_table; - // U32: ==== SQUARE (x) - { - impl::populate_square_dispatch_vectors(); - using impl::square_contig_dispatch_vector; - using impl::square_output_typeid_vector; - using impl::square_strided_dispatch_vector; - - auto square_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, square_output_typeid_vector, - square_contig_dispatch_vector, square_strided_dispatch_vector); + auto minimum_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, + const dpctl::tensor::usm_ndarray &src2, + const dpctl::tensor::usm_ndarray &dst, + sycl::queue &exec_q, + const std::vector &depends = {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, minimum_output_id_table, + // function pointers to handle operation on contiguous + // arrays (pointers may be nullptr) + minimum_contig_dispatch_table, + // function pointers to handle operation on strided arrays + // (most general case) + minimum_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix + // and c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); }; - m.def("_square", square_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto square_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, - square_output_typeid_vector); + auto minimum_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + minimum_output_id_table); }; - m.def("_square_result_type", square_result_type_pyapi); + m.def("_minimum", minimum_pyapi, "", py::arg("src1"), py::arg("src2"), + py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_minimum_result_type", minimum_result_type_pyapi, ""); } - // U33: ==== SQRT (x) + // B19: ==== MULTIPLY (x1, x2) { - impl::populate_sqrt_dispatch_vectors(); - using impl::sqrt_contig_dispatch_vector; - using impl::sqrt_output_typeid_vector; - using impl::sqrt_strided_dispatch_vector; - - auto sqrt_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, sqrt_output_typeid_vector, - sqrt_contig_dispatch_vector, sqrt_strided_dispatch_vector); - }; - m.def("_sqrt", sqrt_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto sqrt_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, sqrt_output_typeid_vector); - }; - m.def("_sqrt_result_type", sqrt_result_type_pyapi); - } + impl::populate_multiply_dispatch_tables(); + using impl::multiply_contig_dispatch_table; + using impl::multiply_contig_matrix_contig_row_broadcast_dispatch_table; + using impl::multiply_contig_row_contig_matrix_broadcast_dispatch_table; + using impl::multiply_output_id_table; + using impl::multiply_strided_dispatch_table; - // B23: ==== SUBTRACT (x1, x2) - { - impl::populate_subtract_dispatch_tables(); - using impl::subtract_contig_dispatch_table; - using impl::subtract_contig_matrix_contig_row_broadcast_dispatch_table; - using impl::subtract_contig_row_contig_matrix_broadcast_dispatch_table; - using impl::subtract_output_id_table; - using impl::subtract_strided_dispatch_table; - - auto subtract_pyapi = + auto multiply_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, const dpctl::tensor::usm_ndarray &src2, const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, const std::vector &depends = {}) { return py_binary_ufunc( - src1, src2, dst, exec_q, depends, subtract_output_id_table, + src1, src2, dst, exec_q, depends, multiply_output_id_table, // function pointers to handle operation on contiguous // arrays (pointers may be nullptr) - subtract_contig_dispatch_table, + multiply_contig_dispatch_table, // function pointers to handle operation on strided arrays // (most general case) - subtract_strided_dispatch_table, + multiply_strided_dispatch_table, // function pointers to handle operation of c-contig matrix // and c-contig row with broadcasting (may be nullptr) - subtract_contig_matrix_contig_row_broadcast_dispatch_table, + multiply_contig_matrix_contig_row_broadcast_dispatch_table, // function pointers to handle operation of c-contig matrix // and c-contig row with broadcasting (may be nullptr) - subtract_contig_row_contig_matrix_broadcast_dispatch_table); + multiply_contig_row_contig_matrix_broadcast_dispatch_table); }; - auto subtract_result_type_pyapi = [&](const py::dtype &dtype1, + auto multiply_result_type_pyapi = [&](const py::dtype &dtype1, const py::dtype &dtype2) { return py_binary_ufunc_result_type(dtype1, dtype2, - subtract_output_id_table); + multiply_output_id_table); }; - m.def("_subtract", subtract_pyapi, "", py::arg("src1"), py::arg("src2"), + m.def("_multiply", multiply_pyapi, "", py::arg("src1"), py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); - m.def("_subtract_result_type", subtract_result_type_pyapi, ""); + m.def("_multiply_result_type", multiply_result_type_pyapi, ""); - using impl::subtract_inplace_contig_dispatch_table; - using impl::subtract_inplace_row_matrix_dispatch_table; - using impl::subtract_inplace_strided_dispatch_table; + using impl::multiply_inplace_contig_dispatch_table; + using impl::multiply_inplace_row_matrix_dispatch_table; + using impl::multiply_inplace_strided_dispatch_table; - auto subtract_inplace_pyapi = + auto multiply_inplace_pyapi = [&](const dpctl::tensor::usm_ndarray &src, const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, const std::vector &depends = {}) { return py_binary_inplace_ufunc( - src, dst, exec_q, depends, subtract_output_id_table, + src, dst, exec_q, depends, multiply_output_id_table, // function pointers to handle inplace operation on // contiguous arrays (pointers may be nullptr) - subtract_inplace_contig_dispatch_table, + multiply_inplace_contig_dispatch_table, // function pointers to handle inplace operation on strided // arrays (most general case) - subtract_inplace_strided_dispatch_table, + multiply_inplace_strided_dispatch_table, // function pointers to handle inplace operation on // c-contig matrix with c-contig row with broadcasting // (may be nullptr) - subtract_inplace_row_matrix_dispatch_table); + multiply_inplace_row_matrix_dispatch_table); }; - m.def("_subtract_inplace", subtract_inplace_pyapi, "", py::arg("lhs"), + m.def("_multiply_inplace", multiply_inplace_pyapi, "", py::arg("lhs"), py::arg("rhs"), py::arg("sycl_queue"), py::arg("depends") = py::list()); } - // U34: ==== TAN (x) - { - impl::populate_tan_dispatch_vectors(); - using impl::tan_contig_dispatch_vector; - using impl::tan_output_typeid_vector; - using impl::tan_strided_dispatch_vector; - - auto tan_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, tan_output_typeid_vector, - tan_contig_dispatch_vector, tan_strided_dispatch_vector); - }; - m.def("_tan", tan_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto tan_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, tan_output_typeid_vector); - }; - m.def("_tan_result_type", tan_result_type_pyapi); - } - - // U35: ==== TANH (x) - { - impl::populate_tanh_dispatch_vectors(); - using impl::tanh_contig_dispatch_vector; - using impl::tanh_output_typeid_vector; - using impl::tanh_strided_dispatch_vector; - - auto tanh_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, tanh_output_typeid_vector, - tanh_contig_dispatch_vector, tanh_strided_dispatch_vector); - }; - m.def("_tanh", tanh_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto tanh_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, tanh_output_typeid_vector); - }; - m.def("_tanh_result_type", tanh_result_type_pyapi); - } - - // U36: ==== TRUNC (x) - { - impl::populate_trunc_dispatch_vectors(); - using impl::trunc_contig_dispatch_vector; - using impl::trunc_output_typeid_vector; - using impl::trunc_strided_dispatch_vector; - - auto trunc_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, trunc_output_typeid_vector, - trunc_contig_dispatch_vector, trunc_strided_dispatch_vector); - }; - m.def("_trunc", trunc_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto trunc_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, - trunc_output_typeid_vector); - }; - m.def("_trunc_result_type", trunc_result_type_pyapi); - } - - // U39: ==== RSQRT (x) + // U25: ==== NEGATIVE (x) { - impl::populate_rsqrt_dispatch_vectors(); - using impl::rsqrt_contig_dispatch_vector; - using impl::rsqrt_output_typeid_vector; - using impl::rsqrt_strided_dispatch_vector; + impl::populate_negative_dispatch_vectors(); + using impl::negative_contig_dispatch_vector; + using impl::negative_output_typeid_vector; + using impl::negative_strided_dispatch_vector; - auto rsqrt_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, rsqrt_output_typeid_vector, - rsqrt_contig_dispatch_vector, rsqrt_strided_dispatch_vector); + auto negative_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc(src, dst, exec_q, depends, + negative_output_typeid_vector, + negative_contig_dispatch_vector, + negative_strided_dispatch_vector); }; - m.def("_rsqrt", rsqrt_pyapi, "", py::arg("src"), py::arg("dst"), + m.def("_negative", negative_pyapi, "", py::arg("src"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); - auto rsqrt_result_type_pyapi = [&](const py::dtype &dtype) { + auto negative_result_type_pyapi = [&](const py::dtype &dtype) { return py_unary_ufunc_result_type(dtype, - rsqrt_output_typeid_vector); + negative_output_typeid_vector); }; - m.def("_rsqrt_result_type", rsqrt_result_type_pyapi); + m.def("_negative_result_type", negative_result_type_pyapi); } } diff --git a/dpctl/tensor/libtensor/source/elementwise_functions2.cpp b/dpctl/tensor/libtensor/source/elementwise_functions2.cpp index 4385a1e4c5..a274e5f150 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions2.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions2.cpp @@ -49,26 +49,6 @@ #include "kernels/elementwise_functions/bitwise_or.hpp" #include "kernels/elementwise_functions/bitwise_right_shift.hpp" #include "kernels/elementwise_functions/bitwise_xor.hpp" -#include "kernels/elementwise_functions/cbrt.hpp" -#include "kernels/elementwise_functions/ceil.hpp" -#include "kernels/elementwise_functions/conj.hpp" -#include "kernels/elementwise_functions/copysign.hpp" -#include "kernels/elementwise_functions/cos.hpp" -#include "kernels/elementwise_functions/cosh.hpp" -#include "kernels/elementwise_functions/equal.hpp" -#include "kernels/elementwise_functions/exp.hpp" -#include "kernels/elementwise_functions/exp2.hpp" -#include "kernels/elementwise_functions/expm1.hpp" -#include "kernels/elementwise_functions/floor.hpp" -#include "kernels/elementwise_functions/floor_divide.hpp" -#include "kernels/elementwise_functions/greater.hpp" -#include "kernels/elementwise_functions/greater_equal.hpp" -#include "kernels/elementwise_functions/hypot.hpp" -#include "kernels/elementwise_functions/imag.hpp" -#include "kernels/elementwise_functions/isfinite.hpp" -#include "kernels/elementwise_functions/isinf.hpp" -#include "kernels/elementwise_functions/isnan.hpp" -#include "kernels/elementwise_functions/true_divide.hpp" namespace dpctl { @@ -821,1649 +801,376 @@ void populate_bitwise_xor_dispatch_tables(void) }; } // namespace impl -// U09: ==== CEIL (x) -namespace impl -{ - -namespace ceil_fn_ns = dpctl::tensor::kernels::ceil; - -static unary_contig_impl_fn_ptr_t ceil_contig_dispatch_vector[td_ns::num_types]; -static int ceil_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - ceil_strided_dispatch_vector[td_ns::num_types]; - -void populate_ceil_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = ceil_fn_ns; - - using fn_ns::CeilContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(ceil_contig_dispatch_vector); - - using fn_ns::CeilStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(ceil_strided_dispatch_vector); - - using fn_ns::CeilTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(ceil_output_typeid_vector); -} - -} // namespace impl - -// U10: ==== CONJ (x) -namespace impl -{ - -namespace conj_fn_ns = dpctl::tensor::kernels::conj; - -static unary_contig_impl_fn_ptr_t conj_contig_dispatch_vector[td_ns::num_types]; -static int conj_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - conj_strided_dispatch_vector[td_ns::num_types]; - -void populate_conj_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = conj_fn_ns; - - using fn_ns::ConjContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(conj_contig_dispatch_vector); - - using fn_ns::ConjStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(conj_strided_dispatch_vector); - - using fn_ns::ConjTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(conj_output_typeid_vector); -} -} // namespace impl - -// U11: ==== COS (x) -namespace impl -{ - -namespace cos_fn_ns = dpctl::tensor::kernels::cos; - -static unary_contig_impl_fn_ptr_t cos_contig_dispatch_vector[td_ns::num_types]; -static int cos_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - cos_strided_dispatch_vector[td_ns::num_types]; - -void populate_cos_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = cos_fn_ns; - - using fn_ns::CosContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(cos_contig_dispatch_vector); - - using fn_ns::CosStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(cos_strided_dispatch_vector); - - using fn_ns::CosTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(cos_output_typeid_vector); -} - -} // namespace impl - -// U12: ==== COSH (x) -namespace impl -{ - -namespace cosh_fn_ns = dpctl::tensor::kernels::cosh; - -static unary_contig_impl_fn_ptr_t cosh_contig_dispatch_vector[td_ns::num_types]; -static int cosh_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - cosh_strided_dispatch_vector[td_ns::num_types]; - -void populate_cosh_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = cosh_fn_ns; - - using fn_ns::CoshContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(cosh_contig_dispatch_vector); - - using fn_ns::CoshStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(cosh_strided_dispatch_vector); - - using fn_ns::CoshTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(cosh_output_typeid_vector); -} - -} // namespace impl - -// B08: ==== DIVIDE (x1, x2) -namespace impl -{ -namespace true_divide_fn_ns = dpctl::tensor::kernels::true_divide; - -static binary_contig_impl_fn_ptr_t - true_divide_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; -static int true_divide_output_id_table[td_ns::num_types][td_ns::num_types]; -static int true_divide_inplace_output_id_table[td_ns::num_types] - [td_ns::num_types]; - -static binary_strided_impl_fn_ptr_t - true_divide_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; - -// divide(matrix, row) -static binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t - true_divide_contig_matrix_contig_row_broadcast_dispatch_table - [td_ns::num_types][td_ns::num_types]; - -// divide(row, matrix) -static binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t - true_divide_contig_row_contig_matrix_broadcast_dispatch_table - [td_ns::num_types][td_ns::num_types]; - -static binary_inplace_contig_impl_fn_ptr_t - true_divide_inplace_contig_dispatch_table[td_ns::num_types] - [td_ns::num_types]; -static binary_inplace_strided_impl_fn_ptr_t - true_divide_inplace_strided_dispatch_table[td_ns::num_types] - [td_ns::num_types]; -static binary_inplace_row_matrix_broadcast_impl_fn_ptr_t - true_divide_inplace_row_matrix_dispatch_table[td_ns::num_types] - [td_ns::num_types]; - -void populate_true_divide_dispatch_tables(void) -{ - using namespace td_ns; - namespace fn_ns = true_divide_fn_ns; - - // which input types are supported, and what is the type of the result - using fn_ns::TrueDivideTypeMapFactory; - DispatchTableBuilder dtb1; - dtb1.populate_dispatch_table(true_divide_output_id_table); - - // function pointers for operation on general strided arrays - using fn_ns::TrueDivideStridedFactory; - DispatchTableBuilder - dtb2; - dtb2.populate_dispatch_table(true_divide_strided_dispatch_table); - - // function pointers for operation on contiguous inputs and output - using fn_ns::TrueDivideContigFactory; - DispatchTableBuilder - dtb3; - dtb3.populate_dispatch_table(true_divide_contig_dispatch_table); - - // function pointers for operation on contiguous matrix, contiguous row - // with contiguous matrix output - using fn_ns::TrueDivideContigMatrixContigRowBroadcastFactory; - DispatchTableBuilder< - binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t, - TrueDivideContigMatrixContigRowBroadcastFactory, num_types> - dtb4; - dtb4.populate_dispatch_table( - true_divide_contig_matrix_contig_row_broadcast_dispatch_table); - - // function pointers for operation on contiguous row, contiguous matrix - // with contiguous matrix output - using fn_ns::TrueDivideContigRowContigMatrixBroadcastFactory; - DispatchTableBuilder< - binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t, - TrueDivideContigRowContigMatrixBroadcastFactory, num_types> - dtb5; - dtb5.populate_dispatch_table( - true_divide_contig_row_contig_matrix_broadcast_dispatch_table); - - // which input types are supported, and what is the type of the result - using fn_ns::TrueDivideInplaceTypeMapFactory; - DispatchTableBuilder dtb6; - dtb6.populate_dispatch_table(true_divide_inplace_output_id_table); - - // function pointers for inplace operation on general strided arrays - using fn_ns::TrueDivideInplaceStridedFactory; - DispatchTableBuilder - dtb7; - dtb7.populate_dispatch_table(true_divide_inplace_strided_dispatch_table); - - // function pointers for inplace operation on contiguous inputs and output - using fn_ns::TrueDivideInplaceContigFactory; - DispatchTableBuilder - dtb8; - dtb8.populate_dispatch_table(true_divide_inplace_contig_dispatch_table); - - // function pointers for inplace operation on contiguous matrix - // and contiguous row - using fn_ns::TrueDivideInplaceRowMatrixBroadcastFactory; - DispatchTableBuilder - dtb9; - dtb9.populate_dispatch_table(true_divide_inplace_row_matrix_dispatch_table); -}; - -} // namespace impl - -// B09: ==== EQUAL (x1, x2) -namespace impl -{ -namespace equal_fn_ns = dpctl::tensor::kernels::equal; - -static binary_contig_impl_fn_ptr_t - equal_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; -static int equal_output_id_table[td_ns::num_types][td_ns::num_types]; - -static binary_strided_impl_fn_ptr_t - equal_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; - -void populate_equal_dispatch_tables(void) -{ - using namespace td_ns; - namespace fn_ns = equal_fn_ns; - - // which input types are supported, and what is the type of the result - using fn_ns::EqualTypeMapFactory; - DispatchTableBuilder dtb1; - dtb1.populate_dispatch_table(equal_output_id_table); - - // function pointers for operation on general strided arrays - using fn_ns::EqualStridedFactory; - DispatchTableBuilder - dtb2; - dtb2.populate_dispatch_table(equal_strided_dispatch_table); - - // function pointers for operation on contiguous inputs and output - using fn_ns::EqualContigFactory; - DispatchTableBuilder - dtb3; - dtb3.populate_dispatch_table(equal_contig_dispatch_table); -}; -} // namespace impl - -// U13: ==== EXP (x) -namespace impl -{ - -namespace exp_fn_ns = dpctl::tensor::kernels::exp; - -static unary_contig_impl_fn_ptr_t exp_contig_dispatch_vector[td_ns::num_types]; -static int exp_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - exp_strided_dispatch_vector[td_ns::num_types]; - -void populate_exp_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = exp_fn_ns; - - using fn_ns::ExpContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(exp_contig_dispatch_vector); - - using fn_ns::ExpStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(exp_strided_dispatch_vector); - - using fn_ns::ExpTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(exp_output_typeid_vector); -} - -} // namespace impl - -// U14: ==== EXPM1 (x) -namespace impl -{ - -namespace expm1_fn_ns = dpctl::tensor::kernels::expm1; - -static unary_contig_impl_fn_ptr_t - expm1_contig_dispatch_vector[td_ns::num_types]; -static int expm1_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - expm1_strided_dispatch_vector[td_ns::num_types]; - -void populate_expm1_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = expm1_fn_ns; - - using fn_ns::Expm1ContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(expm1_contig_dispatch_vector); - - using fn_ns::Expm1StridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(expm1_strided_dispatch_vector); - - using fn_ns::Expm1TypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(expm1_output_typeid_vector); -} - -} // namespace impl - -// U15: ==== FLOOR (x) -namespace impl -{ - -namespace floor_fn_ns = dpctl::tensor::kernels::floor; - -static unary_contig_impl_fn_ptr_t - floor_contig_dispatch_vector[td_ns::num_types]; -static int floor_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - floor_strided_dispatch_vector[td_ns::num_types]; - -void populate_floor_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = floor_fn_ns; - - using fn_ns::FloorContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(floor_contig_dispatch_vector); - - using fn_ns::FloorStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(floor_strided_dispatch_vector); - - using fn_ns::FloorTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(floor_output_typeid_vector); -} - -} // namespace impl - -// B10: ==== FLOOR_DIVIDE (x1, x2) -namespace impl -{ -namespace floor_divide_fn_ns = dpctl::tensor::kernels::floor_divide; - -static binary_contig_impl_fn_ptr_t - floor_divide_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; -static int floor_divide_output_id_table[td_ns::num_types][td_ns::num_types]; - -static binary_strided_impl_fn_ptr_t - floor_divide_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; - -static binary_inplace_contig_impl_fn_ptr_t - floor_divide_inplace_contig_dispatch_table[td_ns::num_types] - [td_ns::num_types]; -static binary_inplace_strided_impl_fn_ptr_t - floor_divide_inplace_strided_dispatch_table[td_ns::num_types] - [td_ns::num_types]; - -void populate_floor_divide_dispatch_tables(void) -{ - using namespace td_ns; - namespace fn_ns = floor_divide_fn_ns; - - // which input types are supported, and what is the type of the result - using fn_ns::FloorDivideTypeMapFactory; - DispatchTableBuilder dtb1; - dtb1.populate_dispatch_table(floor_divide_output_id_table); - - // function pointers for operation on general strided arrays - using fn_ns::FloorDivideStridedFactory; - DispatchTableBuilder - dtb2; - dtb2.populate_dispatch_table(floor_divide_strided_dispatch_table); - - // function pointers for operation on contiguous inputs and output - using fn_ns::FloorDivideContigFactory; - DispatchTableBuilder - dtb3; - dtb3.populate_dispatch_table(floor_divide_contig_dispatch_table); - - // function pointers for inplace operation on general strided arrays - using fn_ns::FloorDivideInplaceStridedFactory; - DispatchTableBuilder - dtb4; - dtb4.populate_dispatch_table(floor_divide_inplace_strided_dispatch_table); - - // function pointers for inplace operation on contiguous inputs and output - using fn_ns::FloorDivideInplaceContigFactory; - DispatchTableBuilder - dtb5; - dtb5.populate_dispatch_table(floor_divide_inplace_contig_dispatch_table); -}; - -} // namespace impl - -// B11: ==== GREATER (x1, x2) -namespace impl -{ -namespace greater_fn_ns = dpctl::tensor::kernels::greater; - -static binary_contig_impl_fn_ptr_t - greater_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; -static int greater_output_id_table[td_ns::num_types][td_ns::num_types]; - -static binary_strided_impl_fn_ptr_t - greater_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; - -void populate_greater_dispatch_tables(void) -{ - using namespace td_ns; - namespace fn_ns = greater_fn_ns; - - // which input types are supported, and what is the type of the result - using fn_ns::GreaterTypeMapFactory; - DispatchTableBuilder dtb1; - dtb1.populate_dispatch_table(greater_output_id_table); - - // function pointers for operation on general strided arrays - using fn_ns::GreaterStridedFactory; - DispatchTableBuilder - dtb2; - dtb2.populate_dispatch_table(greater_strided_dispatch_table); - - // function pointers for operation on contiguous inputs and output - using fn_ns::GreaterContigFactory; - DispatchTableBuilder - dtb3; - dtb3.populate_dispatch_table(greater_contig_dispatch_table); -}; -} // namespace impl - -// B12: ==== GREATER_EQUAL (x1, x2) -namespace impl -{ -namespace greater_equal_fn_ns = dpctl::tensor::kernels::greater_equal; - -static binary_contig_impl_fn_ptr_t - greater_equal_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; -static int greater_equal_output_id_table[td_ns::num_types][td_ns::num_types]; - -static binary_strided_impl_fn_ptr_t - greater_equal_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; - -void populate_greater_equal_dispatch_tables(void) -{ - using namespace td_ns; - namespace fn_ns = greater_equal_fn_ns; - - // which input types are supported, and what is the type of the result - using fn_ns::GreaterEqualTypeMapFactory; - DispatchTableBuilder dtb1; - dtb1.populate_dispatch_table(greater_equal_output_id_table); - - // function pointers for operation on general strided arrays - using fn_ns::GreaterEqualStridedFactory; - DispatchTableBuilder - dtb2; - dtb2.populate_dispatch_table(greater_equal_strided_dispatch_table); - - // function pointers for operation on contiguous inputs and output - using fn_ns::GreaterEqualContigFactory; - DispatchTableBuilder - dtb3; - dtb3.populate_dispatch_table(greater_equal_contig_dispatch_table); -}; -} // namespace impl - -// U16: ==== IMAG (x) -namespace impl -{ - -namespace imag_fn_ns = dpctl::tensor::kernels::imag; - -static unary_contig_impl_fn_ptr_t imag_contig_dispatch_vector[td_ns::num_types]; -static int imag_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - imag_strided_dispatch_vector[td_ns::num_types]; - -void populate_imag_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = imag_fn_ns; - - using fn_ns::ImagContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(imag_contig_dispatch_vector); - - using fn_ns::ImagStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(imag_strided_dispatch_vector); - - using fn_ns::ImagTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(imag_output_typeid_vector); -} -} // namespace impl - -// U17: ==== ISFINITE (x) -namespace impl -{ -namespace isfinite_fn_ns = dpctl::tensor::kernels::isfinite; - -static unary_contig_impl_fn_ptr_t - isfinite_contig_dispatch_vector[td_ns::num_types]; -static int isfinite_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - isfinite_strided_dispatch_vector[td_ns::num_types]; - -void populate_isfinite_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = isfinite_fn_ns; - - using fn_ns::IsFiniteContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(isfinite_contig_dispatch_vector); - - using fn_ns::IsFiniteStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(isfinite_strided_dispatch_vector); - - using fn_ns::IsFiniteTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(isfinite_output_typeid_vector); -} - -} // namespace impl - -// U18: ==== ISINF (x) -namespace impl -{ -namespace isinf_fn_ns = dpctl::tensor::kernels::isinf; - -static unary_contig_impl_fn_ptr_t - isinf_contig_dispatch_vector[td_ns::num_types]; -static int isinf_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - isinf_strided_dispatch_vector[td_ns::num_types]; - -void populate_isinf_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = isinf_fn_ns; - - using fn_ns::IsInfContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(isinf_contig_dispatch_vector); - - using fn_ns::IsInfStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(isinf_strided_dispatch_vector); - - using fn_ns::IsInfTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(isinf_output_typeid_vector); -} - -} // namespace impl - -// U19: ==== ISNAN (x) -namespace impl -{ -namespace isnan_fn_ns = dpctl::tensor::kernels::isnan; - -static unary_contig_impl_fn_ptr_t - isnan_contig_dispatch_vector[td_ns::num_types]; -static int isnan_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - isnan_strided_dispatch_vector[td_ns::num_types]; - -void populate_isnan_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = isnan_fn_ns; - - using fn_ns::IsNanContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(isnan_contig_dispatch_vector); - - using fn_ns::IsNanStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(isnan_strided_dispatch_vector); - - using fn_ns::IsNanTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(isnan_output_typeid_vector); -} - -} // namespace impl - -// B24: ==== HYPOT (x1, x2) -namespace impl -{ -namespace hypot_fn_ns = dpctl::tensor::kernels::hypot; - -static binary_contig_impl_fn_ptr_t - hypot_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; -static int hypot_output_id_table[td_ns::num_types][td_ns::num_types]; - -static binary_strided_impl_fn_ptr_t - hypot_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; - -void populate_hypot_dispatch_tables(void) -{ - using namespace td_ns; - namespace fn_ns = hypot_fn_ns; - - // which input types are supported, and what is the type of the result - using fn_ns::HypotTypeMapFactory; - DispatchTableBuilder dtb1; - dtb1.populate_dispatch_table(hypot_output_id_table); - - // function pointers for operation on general strided arrays - using fn_ns::HypotStridedFactory; - DispatchTableBuilder - dtb2; - dtb2.populate_dispatch_table(hypot_strided_dispatch_table); - - // function pointers for operation on contiguous inputs and output - using fn_ns::HypotContigFactory; - DispatchTableBuilder - dtb3; - dtb3.populate_dispatch_table(hypot_contig_dispatch_table); -}; - -} // namespace impl - -// U37: ==== CBRT (x) -namespace impl -{ - -namespace cbrt_fn_ns = dpctl::tensor::kernels::cbrt; - -static unary_contig_impl_fn_ptr_t cbrt_contig_dispatch_vector[td_ns::num_types]; -static int cbrt_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - cbrt_strided_dispatch_vector[td_ns::num_types]; - -void populate_cbrt_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = cbrt_fn_ns; - - using fn_ns::CbrtContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(cbrt_contig_dispatch_vector); - - using fn_ns::CbrtStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(cbrt_strided_dispatch_vector); - - using fn_ns::CbrtTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(cbrt_output_typeid_vector); -} - -} // namespace impl - -// B24: ==== COPYSIGN (x1, x2) -namespace impl -{ -namespace copysign_fn_ns = dpctl::tensor::kernels::copysign; - -static binary_contig_impl_fn_ptr_t - copysign_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; -static int copysign_output_id_table[td_ns::num_types][td_ns::num_types]; - -static binary_strided_impl_fn_ptr_t - copysign_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; - -void populate_copysign_dispatch_tables(void) -{ - using namespace td_ns; - namespace fn_ns = copysign_fn_ns; - - // which input types are supported, and what is the type of the result - using fn_ns::CopysignTypeMapFactory; - DispatchTableBuilder dtb1; - dtb1.populate_dispatch_table(copysign_output_id_table); - - // function pointers for operation on general strided arrays - using fn_ns::CopysignStridedFactory; - DispatchTableBuilder - dtb2; - dtb2.populate_dispatch_table(copysign_strided_dispatch_table); - - // function pointers for operation on contiguous inputs and output - using fn_ns::CopysignContigFactory; - DispatchTableBuilder - dtb3; - dtb3.populate_dispatch_table(copysign_contig_dispatch_table); -}; - -} // namespace impl - -// U38: ==== EXP2 (x) -namespace impl -{ - -namespace exp2_fn_ns = dpctl::tensor::kernels::exp2; - -static unary_contig_impl_fn_ptr_t exp2_contig_dispatch_vector[td_ns::num_types]; -static int exp2_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - exp2_strided_dispatch_vector[td_ns::num_types]; - -void populate_exp2_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = exp2_fn_ns; - - using fn_ns::Exp2ContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(exp2_contig_dispatch_vector); - - using fn_ns::Exp2StridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(exp2_strided_dispatch_vector); - - using fn_ns::Exp2TypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(exp2_output_typeid_vector); -} - -} // namespace impl - -// ========================================================================================== -// // - -namespace py = pybind11; - -void init_elementwise_functions2(py::module_ m) -{ - - using arrayT = dpctl::tensor::usm_ndarray; - using event_vecT = std::vector; - - // U01: ==== ABS (x) - { - impl::populate_abs_dispatch_vectors(); - using impl::abs_contig_dispatch_vector; - using impl::abs_output_typeid_vector; - using impl::abs_strided_dispatch_vector; - - auto abs_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, abs_output_typeid_vector, - abs_contig_dispatch_vector, abs_strided_dispatch_vector); - }; - m.def("_abs", abs_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto abs_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, abs_output_typeid_vector); - }; - m.def("_abs_result_type", abs_result_type_pyapi); - } - - // U02: ==== ACOS (x) - { - impl::populate_acos_dispatch_vectors(); - using impl::acos_contig_dispatch_vector; - using impl::acos_output_typeid_vector; - using impl::acos_strided_dispatch_vector; - - auto acos_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, acos_output_typeid_vector, - acos_contig_dispatch_vector, acos_strided_dispatch_vector); - }; - m.def("_acos", acos_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto acos_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, acos_output_typeid_vector); - }; - m.def("_acos_result_type", acos_result_type_pyapi); - } - - // U03: ===== ACOSH (x) - { - impl::populate_acosh_dispatch_vectors(); - using impl::acosh_contig_dispatch_vector; - using impl::acosh_output_typeid_vector; - using impl::acosh_strided_dispatch_vector; - - auto acosh_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, acosh_output_typeid_vector, - acosh_contig_dispatch_vector, acosh_strided_dispatch_vector); - }; - m.def("_acosh", acosh_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto acosh_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, - acosh_output_typeid_vector); - }; - m.def("_acosh_result_type", acosh_result_type_pyapi); - } - - // B01: ===== ADD (x1, x2) - { - impl::populate_add_dispatch_tables(); - using impl::add_contig_dispatch_table; - using impl::add_contig_matrix_contig_row_broadcast_dispatch_table; - using impl::add_contig_row_contig_matrix_broadcast_dispatch_table; - using impl::add_output_id_table; - using impl::add_strided_dispatch_table; - - auto add_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, - const dpctl::tensor::usm_ndarray &src2, - const dpctl::tensor::usm_ndarray &dst, - sycl::queue &exec_q, - const std::vector &depends = {}) { - return py_binary_ufunc( - src1, src2, dst, exec_q, depends, add_output_id_table, - // function pointers to handle operation on contiguous arrays - // (pointers may be nullptr) - add_contig_dispatch_table, - // function pointers to handle operation on strided arrays (most - // general case) - add_strided_dispatch_table, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - add_contig_matrix_contig_row_broadcast_dispatch_table, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - add_contig_row_contig_matrix_broadcast_dispatch_table); - }; - auto add_result_type_pyapi = [&](const py::dtype &dtype1, - const py::dtype &dtype2) { - return py_binary_ufunc_result_type(dtype1, dtype2, - add_output_id_table); - }; - m.def("_add", add_pyapi, "", py::arg("src1"), py::arg("src2"), - py::arg("dst"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - m.def("_add_result_type", add_result_type_pyapi, ""); - - using impl::add_inplace_contig_dispatch_table; - using impl::add_inplace_row_matrix_dispatch_table; - using impl::add_inplace_strided_dispatch_table; - - auto add_inplace_pyapi = - [&](const dpctl::tensor::usm_ndarray &src, - const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, - const std::vector &depends = {}) { - return py_binary_inplace_ufunc( - src, dst, exec_q, depends, add_output_id_table, - // function pointers to handle inplace operation on - // contiguous arrays (pointers may be nullptr) - add_inplace_contig_dispatch_table, - // function pointers to handle inplace operation on strided - // arrays (most general case) - add_inplace_strided_dispatch_table, - // function pointers to handle inplace operation on - // c-contig matrix with c-contig row with broadcasting - // (may be nullptr) - add_inplace_row_matrix_dispatch_table); - }; - m.def("_add_inplace", add_inplace_pyapi, "", py::arg("lhs"), - py::arg("rhs"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - } - - // U04: ===== ASIN (x) - { - impl::populate_asin_dispatch_vectors(); - using impl::asin_contig_dispatch_vector; - using impl::asin_output_typeid_vector; - using impl::asin_strided_dispatch_vector; - - auto asin_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, asin_output_typeid_vector, - asin_contig_dispatch_vector, asin_strided_dispatch_vector); - }; - m.def("_asin", asin_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto asin_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, asin_output_typeid_vector); - }; - m.def("_asin_result_type", asin_result_type_pyapi); - } - - // U05: ===== ASINH (x) - { - impl::populate_asinh_dispatch_vectors(); - using impl::asinh_contig_dispatch_vector; - using impl::asinh_output_typeid_vector; - using impl::asinh_strided_dispatch_vector; - - auto asinh_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, asinh_output_typeid_vector, - asinh_contig_dispatch_vector, asinh_strided_dispatch_vector); - }; - m.def("_asinh", asinh_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto asinh_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, - asinh_output_typeid_vector); - }; - m.def("_asinh_result_type", asinh_result_type_pyapi); - } - - // U06: ===== ATAN (x) - { - impl::populate_atan_dispatch_vectors(); - using impl::atan_contig_dispatch_vector; - using impl::atan_output_typeid_vector; - using impl::atan_strided_dispatch_vector; - - auto atan_pyapi = [&](arrayT src, arrayT dst, sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, atan_output_typeid_vector, - atan_contig_dispatch_vector, atan_strided_dispatch_vector); - }; - m.def("_atan", atan_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto atan_result_type_pyapi = [&](py::dtype dtype) { - return py_unary_ufunc_result_type(dtype, atan_output_typeid_vector); - }; - m.def("_atan_result_type", atan_result_type_pyapi); - } - - // B02: ===== ATAN2 (x1, x2) - { - impl::populate_atan2_dispatch_tables(); - using impl::atan2_contig_dispatch_table; - using impl::atan2_output_id_table; - using impl::atan2_strided_dispatch_table; - - auto atan2_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, - const dpctl::tensor::usm_ndarray &src2, - const dpctl::tensor::usm_ndarray &dst, - sycl::queue &exec_q, - const std::vector &depends = {}) { - return py_binary_ufunc( - src1, src2, dst, exec_q, depends, atan2_output_id_table, - // function pointers to handle operation on contiguous arrays - // (pointers may be nullptr) - atan2_contig_dispatch_table, - // function pointers to handle operation on strided arrays (most - // general case) - atan2_strided_dispatch_table, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); - }; - auto atan2_result_type_pyapi = [&](const py::dtype &dtype1, - const py::dtype &dtype2) { - return py_binary_ufunc_result_type(dtype1, dtype2, - atan2_output_id_table); - }; - m.def("_atan2", atan2_pyapi, "", py::arg("src1"), py::arg("src2"), - py::arg("dst"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - m.def("_atan2_result_type", atan2_result_type_pyapi, ""); - } - - // U07: ===== ATANH (x) - { - impl::populate_atanh_dispatch_vectors(); - using impl::atanh_contig_dispatch_vector; - using impl::atanh_output_typeid_vector; - using impl::atanh_strided_dispatch_vector; - - auto atanh_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, atanh_output_typeid_vector, - atanh_contig_dispatch_vector, atanh_strided_dispatch_vector); - }; - m.def("_atanh", atanh_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto atanh_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, - atanh_output_typeid_vector); - }; - m.def("_atanh_result_type", atanh_result_type_pyapi); - } - - // B03: ===== BITWISE_AND (x1, x2) - { - impl::populate_bitwise_and_dispatch_tables(); - using impl::bitwise_and_contig_dispatch_table; - using impl::bitwise_and_output_id_table; - using impl::bitwise_and_strided_dispatch_table; - - auto bitwise_and_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, - const dpctl::tensor::usm_ndarray &src2, - const dpctl::tensor::usm_ndarray &dst, - sycl::queue &exec_q, - const std::vector &depends = - {}) { - return py_binary_ufunc( - src1, src2, dst, exec_q, depends, bitwise_and_output_id_table, - // function pointers to handle operation on contiguous arrays - // (pointers may be nullptr) - bitwise_and_contig_dispatch_table, - // function pointers to handle operation on strided arrays (most - // general case) - bitwise_and_strided_dispatch_table, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); - }; - auto bitwise_and_result_type_pyapi = [&](const py::dtype &dtype1, - const py::dtype &dtype2) { - return py_binary_ufunc_result_type(dtype1, dtype2, - bitwise_and_output_id_table); - }; - m.def("_bitwise_and", bitwise_and_pyapi, "", py::arg("src1"), - py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - m.def("_bitwise_and_result_type", bitwise_and_result_type_pyapi, ""); - - using impl::bitwise_and_inplace_contig_dispatch_table; - using impl::bitwise_and_inplace_strided_dispatch_table; - - auto bitwise_and_inplace_pyapi = - [&](const dpctl::tensor::usm_ndarray &src, - const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, - const std::vector &depends = {}) { - return py_binary_inplace_ufunc( - src, dst, exec_q, depends, bitwise_and_output_id_table, - // function pointers to handle inplace operation on - // contiguous arrays (pointers may be nullptr) - bitwise_and_inplace_contig_dispatch_table, - // function pointers to handle inplace operation on strided - // arrays (most general case) - bitwise_and_inplace_strided_dispatch_table, - // function pointers to handle inplace operation on - // c-contig matrix with c-contig row with broadcasting - // (may be nullptr) - td_ns::NullPtrTable< - binary_inplace_row_matrix_broadcast_impl_fn_ptr_t>{}); - }; - m.def("_bitwise_and_inplace", bitwise_and_inplace_pyapi, "", - py::arg("lhs"), py::arg("rhs"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - } - - // B04: ===== BITWISE_LEFT_SHIFT (x1, x2) - { - impl::populate_bitwise_left_shift_dispatch_tables(); - using impl::bitwise_left_shift_contig_dispatch_table; - using impl::bitwise_left_shift_output_id_table; - using impl::bitwise_left_shift_strided_dispatch_table; - - auto bitwise_left_shift_pyapi = [&](const dpctl::tensor::usm_ndarray - &src1, - const dpctl::tensor::usm_ndarray - &src2, - const dpctl::tensor::usm_ndarray - &dst, - sycl::queue &exec_q, - const std::vector - &depends = {}) { - return py_binary_ufunc( - src1, src2, dst, exec_q, depends, - bitwise_left_shift_output_id_table, - // function pointers to handle operation on contiguous arrays - // (pointers may be nullptr) - bitwise_left_shift_contig_dispatch_table, - // function pointers to handle operation on strided arrays (most - // general case) - bitwise_left_shift_strided_dispatch_table, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); - }; - auto bitwise_left_shift_result_type_pyapi = - [&](const py::dtype &dtype1, const py::dtype &dtype2) { - return py_binary_ufunc_result_type( - dtype1, dtype2, bitwise_left_shift_output_id_table); - }; - m.def("_bitwise_left_shift", bitwise_left_shift_pyapi, "", - py::arg("src1"), py::arg("src2"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - m.def("_bitwise_left_shift_result_type", - bitwise_left_shift_result_type_pyapi, ""); +// ========================================================================================== +// // - using impl::bitwise_left_shift_inplace_contig_dispatch_table; - using impl::bitwise_left_shift_inplace_strided_dispatch_table; +namespace py = pybind11; - auto bitwise_left_shift_inplace_pyapi = - [&](const dpctl::tensor::usm_ndarray &src, - const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, - const std::vector &depends = {}) { - return py_binary_inplace_ufunc( - src, dst, exec_q, depends, - bitwise_left_shift_output_id_table, - // function pointers to handle inplace operation on - // contiguous arrays (pointers may be nullptr) - bitwise_left_shift_inplace_contig_dispatch_table, - // function pointers to handle inplace operation on strided - // arrays (most general case) - bitwise_left_shift_inplace_strided_dispatch_table, - // function pointers to handle inplace operation on - // c-contig matrix with c-contig row with broadcasting - // (may be nullptr) - td_ns::NullPtrTable< - binary_inplace_row_matrix_broadcast_impl_fn_ptr_t>{}); - }; - m.def("_bitwise_left_shift_inplace", bitwise_left_shift_inplace_pyapi, - "", py::arg("lhs"), py::arg("rhs"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - } +void init_elementwise_functions2(py::module_ m) +{ - // U08: ===== BITWISE_INVERT (x) + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + + // U01: ==== ABS (x) { - impl::populate_bitwise_invert_dispatch_vectors(); - using impl::bitwise_invert_contig_dispatch_vector; - using impl::bitwise_invert_output_typeid_vector; - using impl::bitwise_invert_strided_dispatch_vector; + impl::populate_abs_dispatch_vectors(); + using impl::abs_contig_dispatch_vector; + using impl::abs_output_typeid_vector; + using impl::abs_strided_dispatch_vector; - auto bitwise_invert_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc(src, dst, exec_q, depends, - bitwise_invert_output_typeid_vector, - bitwise_invert_contig_dispatch_vector, - bitwise_invert_strided_dispatch_vector); + auto abs_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, abs_output_typeid_vector, + abs_contig_dispatch_vector, abs_strided_dispatch_vector); }; - m.def("_bitwise_invert", bitwise_invert_pyapi, "", py::arg("src"), - py::arg("dst"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); + m.def("_abs", abs_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); - auto bitwise_invert_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type( - dtype, bitwise_invert_output_typeid_vector); + auto abs_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, abs_output_typeid_vector); }; - m.def("_bitwise_invert_result_type", bitwise_invert_result_type_pyapi); + m.def("_abs_result_type", abs_result_type_pyapi); } - // B05: ===== BITWISE_OR (x1, x2) + // U02: ==== ACOS (x) { - impl::populate_bitwise_or_dispatch_tables(); - using impl::bitwise_or_contig_dispatch_table; - using impl::bitwise_or_output_id_table; - using impl::bitwise_or_strided_dispatch_table; + impl::populate_acos_dispatch_vectors(); + using impl::acos_contig_dispatch_vector; + using impl::acos_output_typeid_vector; + using impl::acos_strided_dispatch_vector; - auto bitwise_or_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, - const dpctl::tensor::usm_ndarray &src2, - const dpctl::tensor::usm_ndarray &dst, - sycl::queue &exec_q, - const std::vector &depends = - {}) { - return py_binary_ufunc( - src1, src2, dst, exec_q, depends, bitwise_or_output_id_table, - // function pointers to handle operation on contiguous arrays - // (pointers may be nullptr) - bitwise_or_contig_dispatch_table, - // function pointers to handle operation on strided arrays (most - // general case) - bitwise_or_strided_dispatch_table, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); - }; - auto bitwise_or_result_type_pyapi = [&](const py::dtype &dtype1, - const py::dtype &dtype2) { - return py_binary_ufunc_result_type(dtype1, dtype2, - bitwise_or_output_id_table); + auto acos_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, acos_output_typeid_vector, + acos_contig_dispatch_vector, acos_strided_dispatch_vector); }; - m.def("_bitwise_or", bitwise_or_pyapi, "", py::arg("src1"), - py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - m.def("_bitwise_or_result_type", bitwise_or_result_type_pyapi, ""); - - using impl::bitwise_or_inplace_contig_dispatch_table; - using impl::bitwise_or_inplace_strided_dispatch_table; + m.def("_acos", acos_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); - auto bitwise_or_inplace_pyapi = - [&](const dpctl::tensor::usm_ndarray &src, - const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, - const std::vector &depends = {}) { - return py_binary_inplace_ufunc( - src, dst, exec_q, depends, bitwise_or_output_id_table, - // function pointers to handle inplace operation on - // contiguous arrays (pointers may be nullptr) - bitwise_or_inplace_contig_dispatch_table, - // function pointers to handle inplace operation on strided - // arrays (most general case) - bitwise_or_inplace_strided_dispatch_table, - // function pointers to handle inplace operation on - // c-contig matrix with c-contig row with broadcasting - // (may be nullptr) - td_ns::NullPtrTable< - binary_inplace_row_matrix_broadcast_impl_fn_ptr_t>{}); - }; - m.def("_bitwise_or_inplace", bitwise_or_inplace_pyapi, "", - py::arg("lhs"), py::arg("rhs"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); + auto acos_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, acos_output_typeid_vector); + }; + m.def("_acos_result_type", acos_result_type_pyapi); } - // B06: ===== BITWISE_RIGHT_SHIFT (x1, x2) + // U03: ===== ACOSH (x) { - impl::populate_bitwise_right_shift_dispatch_tables(); - using impl::bitwise_right_shift_contig_dispatch_table; - using impl::bitwise_right_shift_output_id_table; - using impl::bitwise_right_shift_strided_dispatch_table; + impl::populate_acosh_dispatch_vectors(); + using impl::acosh_contig_dispatch_vector; + using impl::acosh_output_typeid_vector; + using impl::acosh_strided_dispatch_vector; - auto bitwise_right_shift_pyapi = [&](const dpctl::tensor::usm_ndarray - &src1, - const dpctl::tensor::usm_ndarray - &src2, - const dpctl::tensor::usm_ndarray - &dst, - sycl::queue &exec_q, - const std::vector - &depends = {}) { - return py_binary_ufunc( - src1, src2, dst, exec_q, depends, - bitwise_right_shift_output_id_table, - // function pointers to handle operation on contiguous arrays - // (pointers may be nullptr) - bitwise_right_shift_contig_dispatch_table, - // function pointers to handle operation on strided arrays (most - // general case) - bitwise_right_shift_strided_dispatch_table, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + auto acosh_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, acosh_output_typeid_vector, + acosh_contig_dispatch_vector, acosh_strided_dispatch_vector); }; - auto bitwise_right_shift_result_type_pyapi = - [&](const py::dtype &dtype1, const py::dtype &dtype2) { - return py_binary_ufunc_result_type( - dtype1, dtype2, bitwise_right_shift_output_id_table); - }; - m.def("_bitwise_right_shift", bitwise_right_shift_pyapi, "", - py::arg("src1"), py::arg("src2"), py::arg("dst"), + m.def("_acosh", acosh_pyapi, "", py::arg("src"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); - m.def("_bitwise_right_shift_result_type", - bitwise_right_shift_result_type_pyapi, ""); - - using impl::bitwise_right_shift_inplace_contig_dispatch_table; - using impl::bitwise_right_shift_inplace_strided_dispatch_table; - auto bitwise_right_shift_inplace_pyapi = - [&](const dpctl::tensor::usm_ndarray &src, - const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, - const std::vector &depends = {}) { - return py_binary_inplace_ufunc( - src, dst, exec_q, depends, - bitwise_right_shift_output_id_table, - // function pointers to handle inplace operation on - // contiguous arrays (pointers may be nullptr) - bitwise_right_shift_inplace_contig_dispatch_table, - // function pointers to handle inplace operation on strided - // arrays (most general case) - bitwise_right_shift_inplace_strided_dispatch_table, - // function pointers to handle inplace operation on - // c-contig matrix with c-contig row with broadcasting - // (may be nullptr) - td_ns::NullPtrTable< - binary_inplace_row_matrix_broadcast_impl_fn_ptr_t>{}); - }; - m.def("_bitwise_right_shift_inplace", bitwise_right_shift_inplace_pyapi, - "", py::arg("lhs"), py::arg("rhs"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); + auto acosh_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, + acosh_output_typeid_vector); + }; + m.def("_acosh_result_type", acosh_result_type_pyapi); } - // B07: ===== BITWISE_XOR (x1, x2) + // B01: ===== ADD (x1, x2) { - impl::populate_bitwise_xor_dispatch_tables(); - using impl::bitwise_xor_contig_dispatch_table; - using impl::bitwise_xor_output_id_table; - using impl::bitwise_xor_strided_dispatch_table; + impl::populate_add_dispatch_tables(); + using impl::add_contig_dispatch_table; + using impl::add_contig_matrix_contig_row_broadcast_dispatch_table; + using impl::add_contig_row_contig_matrix_broadcast_dispatch_table; + using impl::add_output_id_table; + using impl::add_strided_dispatch_table; - auto bitwise_xor_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, - const dpctl::tensor::usm_ndarray &src2, - const dpctl::tensor::usm_ndarray &dst, - sycl::queue &exec_q, - const std::vector &depends = - {}) { + auto add_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, + const dpctl::tensor::usm_ndarray &src2, + const dpctl::tensor::usm_ndarray &dst, + sycl::queue &exec_q, + const std::vector &depends = {}) { return py_binary_ufunc( - src1, src2, dst, exec_q, depends, bitwise_xor_output_id_table, + src1, src2, dst, exec_q, depends, add_output_id_table, // function pointers to handle operation on contiguous arrays // (pointers may be nullptr) - bitwise_xor_contig_dispatch_table, + add_contig_dispatch_table, // function pointers to handle operation on strided arrays (most // general case) - bitwise_xor_strided_dispatch_table, + add_strided_dispatch_table, // function pointers to handle operation of c-contig matrix and // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + add_contig_matrix_contig_row_broadcast_dispatch_table, // function pointers to handle operation of c-contig matrix and // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + add_contig_row_contig_matrix_broadcast_dispatch_table); }; - auto bitwise_xor_result_type_pyapi = [&](const py::dtype &dtype1, - const py::dtype &dtype2) { + auto add_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { return py_binary_ufunc_result_type(dtype1, dtype2, - bitwise_xor_output_id_table); + add_output_id_table); }; - m.def("_bitwise_xor", bitwise_xor_pyapi, "", py::arg("src1"), - py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), + m.def("_add", add_pyapi, "", py::arg("src1"), py::arg("src2"), + py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); - m.def("_bitwise_xor_result_type", bitwise_xor_result_type_pyapi, ""); + m.def("_add_result_type", add_result_type_pyapi, ""); - using impl::bitwise_xor_inplace_contig_dispatch_table; - using impl::bitwise_xor_inplace_strided_dispatch_table; + using impl::add_inplace_contig_dispatch_table; + using impl::add_inplace_row_matrix_dispatch_table; + using impl::add_inplace_strided_dispatch_table; - auto bitwise_xor_inplace_pyapi = + auto add_inplace_pyapi = [&](const dpctl::tensor::usm_ndarray &src, const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, const std::vector &depends = {}) { return py_binary_inplace_ufunc( - src, dst, exec_q, depends, bitwise_xor_output_id_table, + src, dst, exec_q, depends, add_output_id_table, // function pointers to handle inplace operation on // contiguous arrays (pointers may be nullptr) - bitwise_xor_inplace_contig_dispatch_table, + add_inplace_contig_dispatch_table, // function pointers to handle inplace operation on strided // arrays (most general case) - bitwise_xor_inplace_strided_dispatch_table, + add_inplace_strided_dispatch_table, // function pointers to handle inplace operation on // c-contig matrix with c-contig row with broadcasting // (may be nullptr) - td_ns::NullPtrTable< - binary_inplace_row_matrix_broadcast_impl_fn_ptr_t>{}); + add_inplace_row_matrix_dispatch_table); }; - m.def("_bitwise_xor_inplace", bitwise_xor_inplace_pyapi, "", - py::arg("lhs"), py::arg("rhs"), py::arg("sycl_queue"), + m.def("_add_inplace", add_inplace_pyapi, "", py::arg("lhs"), + py::arg("rhs"), py::arg("sycl_queue"), py::arg("depends") = py::list()); } - // U09: ==== CEIL (x) + // U04: ===== ASIN (x) { - impl::populate_ceil_dispatch_vectors(); - using impl::ceil_contig_dispatch_vector; - using impl::ceil_output_typeid_vector; - using impl::ceil_strided_dispatch_vector; + impl::populate_asin_dispatch_vectors(); + using impl::asin_contig_dispatch_vector; + using impl::asin_output_typeid_vector; + using impl::asin_strided_dispatch_vector; - auto ceil_pyapi = [&](const arrayT &src, const arrayT &dst, + auto asin_pyapi = [&](const arrayT &src, const arrayT &dst, sycl::queue &exec_q, const event_vecT &depends = {}) { return py_unary_ufunc( - src, dst, exec_q, depends, ceil_output_typeid_vector, - ceil_contig_dispatch_vector, ceil_strided_dispatch_vector); + src, dst, exec_q, depends, asin_output_typeid_vector, + asin_contig_dispatch_vector, asin_strided_dispatch_vector); }; - m.def("_ceil", ceil_pyapi, "", py::arg("src"), py::arg("dst"), + m.def("_asin", asin_pyapi, "", py::arg("src"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); - auto ceil_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, ceil_output_typeid_vector); + auto asin_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, asin_output_typeid_vector); }; - m.def("_ceil_result_type", ceil_result_type_pyapi); + m.def("_asin_result_type", asin_result_type_pyapi); } - // U10: ==== CONJ (x) + // U05: ===== ASINH (x) { - impl::populate_conj_dispatch_vectors(); - using impl::conj_contig_dispatch_vector; - using impl::conj_output_typeid_vector; - using impl::conj_strided_dispatch_vector; + impl::populate_asinh_dispatch_vectors(); + using impl::asinh_contig_dispatch_vector; + using impl::asinh_output_typeid_vector; + using impl::asinh_strided_dispatch_vector; - auto conj_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { + auto asinh_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { return py_unary_ufunc( - src, dst, exec_q, depends, conj_output_typeid_vector, - conj_contig_dispatch_vector, conj_strided_dispatch_vector); + src, dst, exec_q, depends, asinh_output_typeid_vector, + asinh_contig_dispatch_vector, asinh_strided_dispatch_vector); }; - m.def("_conj", conj_pyapi, "", py::arg("src"), py::arg("dst"), + m.def("_asinh", asinh_pyapi, "", py::arg("src"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); - auto conj_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, conj_output_typeid_vector); + auto asinh_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, + asinh_output_typeid_vector); }; - m.def("_conj_result_type", conj_result_type_pyapi); + m.def("_asinh_result_type", asinh_result_type_pyapi); } - // U11: ==== COS (x) + // U06: ===== ATAN (x) { - impl::populate_cos_dispatch_vectors(); - using impl::cos_contig_dispatch_vector; - using impl::cos_output_typeid_vector; - using impl::cos_strided_dispatch_vector; + impl::populate_atan_dispatch_vectors(); + using impl::atan_contig_dispatch_vector; + using impl::atan_output_typeid_vector; + using impl::atan_strided_dispatch_vector; - auto cos_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { + auto atan_pyapi = [&](arrayT src, arrayT dst, sycl::queue &exec_q, + const event_vecT &depends = {}) { return py_unary_ufunc( - src, dst, exec_q, depends, cos_output_typeid_vector, - cos_contig_dispatch_vector, cos_strided_dispatch_vector); + src, dst, exec_q, depends, atan_output_typeid_vector, + atan_contig_dispatch_vector, atan_strided_dispatch_vector); }; - m.def("_cos", cos_pyapi, "", py::arg("src"), py::arg("dst"), + m.def("_atan", atan_pyapi, "", py::arg("src"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); - auto cos_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, cos_output_typeid_vector); + auto atan_result_type_pyapi = [&](py::dtype dtype) { + return py_unary_ufunc_result_type(dtype, atan_output_typeid_vector); }; - m.def("_cos_result_type", cos_result_type_pyapi); + m.def("_atan_result_type", atan_result_type_pyapi); } - // U12: ==== COSH (x) + // B02: ===== ATAN2 (x1, x2) { - impl::populate_cosh_dispatch_vectors(); - using impl::cosh_contig_dispatch_vector; - using impl::cosh_output_typeid_vector; - using impl::cosh_strided_dispatch_vector; + impl::populate_atan2_dispatch_tables(); + using impl::atan2_contig_dispatch_table; + using impl::atan2_output_id_table; + using impl::atan2_strided_dispatch_table; - auto cosh_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { + auto atan2_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, + const dpctl::tensor::usm_ndarray &src2, + const dpctl::tensor::usm_ndarray &dst, + sycl::queue &exec_q, + const std::vector &depends = {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, atan2_output_id_table, + // function pointers to handle operation on contiguous arrays + // (pointers may be nullptr) + atan2_contig_dispatch_table, + // function pointers to handle operation on strided arrays (most + // general case) + atan2_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + auto atan2_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + atan2_output_id_table); + }; + m.def("_atan2", atan2_pyapi, "", py::arg("src1"), py::arg("src2"), + py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_atan2_result_type", atan2_result_type_pyapi, ""); + } + + // U07: ===== ATANH (x) + { + impl::populate_atanh_dispatch_vectors(); + using impl::atanh_contig_dispatch_vector; + using impl::atanh_output_typeid_vector; + using impl::atanh_strided_dispatch_vector; + + auto atanh_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { return py_unary_ufunc( - src, dst, exec_q, depends, cosh_output_typeid_vector, - cosh_contig_dispatch_vector, cosh_strided_dispatch_vector); + src, dst, exec_q, depends, atanh_output_typeid_vector, + atanh_contig_dispatch_vector, atanh_strided_dispatch_vector); }; - m.def("_cosh", cosh_pyapi, "", py::arg("src"), py::arg("dst"), + m.def("_atanh", atanh_pyapi, "", py::arg("src"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); - auto cosh_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, cosh_output_typeid_vector); + auto atanh_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, + atanh_output_typeid_vector); }; - m.def("_cosh_result_type", cosh_result_type_pyapi); + m.def("_atanh_result_type", atanh_result_type_pyapi); } - // B08: ==== DIVIDE (x1, x2) + // B03: ===== BITWISE_AND (x1, x2) { - impl::populate_true_divide_dispatch_tables(); - using impl::true_divide_contig_dispatch_table; - using impl:: - true_divide_contig_matrix_contig_row_broadcast_dispatch_table; - using impl:: - true_divide_contig_row_contig_matrix_broadcast_dispatch_table; - using impl::true_divide_output_id_table; - using impl::true_divide_strided_dispatch_table; - - auto divide_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, - const dpctl::tensor::usm_ndarray &src2, - const dpctl::tensor::usm_ndarray &dst, - sycl::queue &exec_q, - const std::vector &depends = {}) { + impl::populate_bitwise_and_dispatch_tables(); + using impl::bitwise_and_contig_dispatch_table; + using impl::bitwise_and_output_id_table; + using impl::bitwise_and_strided_dispatch_table; + + auto bitwise_and_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, + const dpctl::tensor::usm_ndarray &src2, + const dpctl::tensor::usm_ndarray &dst, + sycl::queue &exec_q, + const std::vector &depends = + {}) { return py_binary_ufunc( - src1, src2, dst, exec_q, depends, true_divide_output_id_table, + src1, src2, dst, exec_q, depends, bitwise_and_output_id_table, // function pointers to handle operation on contiguous arrays // (pointers may be nullptr) - true_divide_contig_dispatch_table, + bitwise_and_contig_dispatch_table, // function pointers to handle operation on strided arrays (most // general case) - true_divide_strided_dispatch_table, + bitwise_and_strided_dispatch_table, // function pointers to handle operation of c-contig matrix and // c-contig row with broadcasting (may be nullptr) - true_divide_contig_matrix_contig_row_broadcast_dispatch_table, + td_ns::NullPtrTable< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, // function pointers to handle operation of c-contig matrix and // c-contig row with broadcasting (may be nullptr) - true_divide_contig_row_contig_matrix_broadcast_dispatch_table); + td_ns::NullPtrTable< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); }; - auto divide_result_type_pyapi = [&](const py::dtype &dtype1, - const py::dtype &dtype2) { + auto bitwise_and_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { return py_binary_ufunc_result_type(dtype1, dtype2, - true_divide_output_id_table); + bitwise_and_output_id_table); }; - m.def("_divide", divide_pyapi, "", py::arg("src1"), py::arg("src2"), - py::arg("dst"), py::arg("sycl_queue"), + m.def("_bitwise_and", bitwise_and_pyapi, "", py::arg("src1"), + py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); - m.def("_divide_result_type", divide_result_type_pyapi, ""); + m.def("_bitwise_and_result_type", bitwise_and_result_type_pyapi, ""); - using impl::true_divide_inplace_contig_dispatch_table; - using impl::true_divide_inplace_output_id_table; - using impl::true_divide_inplace_row_matrix_dispatch_table; - using impl::true_divide_inplace_strided_dispatch_table; + using impl::bitwise_and_inplace_contig_dispatch_table; + using impl::bitwise_and_inplace_strided_dispatch_table; - auto divide_inplace_pyapi = + auto bitwise_and_inplace_pyapi = [&](const dpctl::tensor::usm_ndarray &src, const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, const std::vector &depends = {}) { return py_binary_inplace_ufunc( - src, dst, exec_q, depends, - true_divide_inplace_output_id_table, + src, dst, exec_q, depends, bitwise_and_output_id_table, // function pointers to handle inplace operation on // contiguous arrays (pointers may be nullptr) - true_divide_inplace_contig_dispatch_table, + bitwise_and_inplace_contig_dispatch_table, // function pointers to handle inplace operation on strided // arrays (most general case) - true_divide_inplace_strided_dispatch_table, + bitwise_and_inplace_strided_dispatch_table, // function pointers to handle inplace operation on // c-contig matrix with c-contig row with broadcasting // (may be nullptr) - true_divide_inplace_row_matrix_dispatch_table); + td_ns::NullPtrTable< + binary_inplace_row_matrix_broadcast_impl_fn_ptr_t>{}); }; - m.def("_divide_inplace", divide_inplace_pyapi, "", py::arg("lhs"), - py::arg("rhs"), py::arg("sycl_queue"), + m.def("_bitwise_and_inplace", bitwise_and_inplace_pyapi, "", + py::arg("lhs"), py::arg("rhs"), py::arg("sycl_queue"), py::arg("depends") = py::list()); } - // B09: ==== EQUAL (x1, x2) + // B04: ===== BITWISE_LEFT_SHIFT (x1, x2) { - impl::populate_equal_dispatch_tables(); - using impl::equal_contig_dispatch_table; - using impl::equal_output_id_table; - using impl::equal_strided_dispatch_table; + impl::populate_bitwise_left_shift_dispatch_tables(); + using impl::bitwise_left_shift_contig_dispatch_table; + using impl::bitwise_left_shift_output_id_table; + using impl::bitwise_left_shift_strided_dispatch_table; - auto equal_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, - const dpctl::tensor::usm_ndarray &src2, - const dpctl::tensor::usm_ndarray &dst, - sycl::queue &exec_q, - const std::vector &depends = {}) { + auto bitwise_left_shift_pyapi = [&](const dpctl::tensor::usm_ndarray + &src1, + const dpctl::tensor::usm_ndarray + &src2, + const dpctl::tensor::usm_ndarray + &dst, + sycl::queue &exec_q, + const std::vector + &depends = {}) { return py_binary_ufunc( - src1, src2, dst, exec_q, depends, equal_output_id_table, + src1, src2, dst, exec_q, depends, + bitwise_left_shift_output_id_table, // function pointers to handle operation on contiguous arrays // (pointers may be nullptr) - equal_contig_dispatch_table, + bitwise_left_shift_contig_dispatch_table, // function pointers to handle operation on strided arrays (most // general case) - equal_strided_dispatch_table, + bitwise_left_shift_strided_dispatch_table, // function pointers to handle operation of c-contig matrix and // c-contig row with broadcasting (may be nullptr) td_ns::NullPtrTable< @@ -2473,109 +1180,91 @@ void init_elementwise_functions2(py::module_ m) td_ns::NullPtrTable< binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); }; - auto equal_result_type_pyapi = [&](const py::dtype &dtype1, - const py::dtype &dtype2) { - return py_binary_ufunc_result_type(dtype1, dtype2, - equal_output_id_table); - }; - m.def("_equal", equal_pyapi, "", py::arg("src1"), py::arg("src2"), - py::arg("dst"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - m.def("_equal_result_type", equal_result_type_pyapi, ""); - } - - // U13: ==== EXP (x) - { - impl::populate_exp_dispatch_vectors(); - using impl::exp_contig_dispatch_vector; - using impl::exp_output_typeid_vector; - using impl::exp_strided_dispatch_vector; - - auto exp_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, exp_output_typeid_vector, - exp_contig_dispatch_vector, exp_strided_dispatch_vector); - }; - m.def("_exp", exp_pyapi, "", py::arg("src"), py::arg("dst"), + auto bitwise_left_shift_result_type_pyapi = + [&](const py::dtype &dtype1, const py::dtype &dtype2) { + return py_binary_ufunc_result_type( + dtype1, dtype2, bitwise_left_shift_output_id_table); + }; + m.def("_bitwise_left_shift", bitwise_left_shift_pyapi, "", + py::arg("src1"), py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); + m.def("_bitwise_left_shift_result_type", + bitwise_left_shift_result_type_pyapi, ""); - auto exp_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, exp_output_typeid_vector); - }; - m.def("_exp_result_type", exp_result_type_pyapi); - } - - // U14: ==== EXPM1 (x) - { - impl::populate_expm1_dispatch_vectors(); - using impl::expm1_contig_dispatch_vector; - using impl::expm1_output_typeid_vector; - using impl::expm1_strided_dispatch_vector; - - auto expm1_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, expm1_output_typeid_vector, - expm1_contig_dispatch_vector, expm1_strided_dispatch_vector); - }; - m.def("_expm1", expm1_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); + using impl::bitwise_left_shift_inplace_contig_dispatch_table; + using impl::bitwise_left_shift_inplace_strided_dispatch_table; - auto expm1_result_type_pyapi = [&](const py::dtype dtype) { - return py_unary_ufunc_result_type(dtype, - expm1_output_typeid_vector); - }; - m.def("_expm1_result_type", expm1_result_type_pyapi); + auto bitwise_left_shift_inplace_pyapi = + [&](const dpctl::tensor::usm_ndarray &src, + const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, + const std::vector &depends = {}) { + return py_binary_inplace_ufunc( + src, dst, exec_q, depends, + bitwise_left_shift_output_id_table, + // function pointers to handle inplace operation on + // contiguous arrays (pointers may be nullptr) + bitwise_left_shift_inplace_contig_dispatch_table, + // function pointers to handle inplace operation on strided + // arrays (most general case) + bitwise_left_shift_inplace_strided_dispatch_table, + // function pointers to handle inplace operation on + // c-contig matrix with c-contig row with broadcasting + // (may be nullptr) + td_ns::NullPtrTable< + binary_inplace_row_matrix_broadcast_impl_fn_ptr_t>{}); + }; + m.def("_bitwise_left_shift_inplace", bitwise_left_shift_inplace_pyapi, + "", py::arg("lhs"), py::arg("rhs"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); } - // U15: ==== FLOOR (x) + // U08: ===== BITWISE_INVERT (x) { - impl::populate_floor_dispatch_vectors(); - using impl::floor_contig_dispatch_vector; - using impl::floor_output_typeid_vector; - using impl::floor_strided_dispatch_vector; + impl::populate_bitwise_invert_dispatch_vectors(); + using impl::bitwise_invert_contig_dispatch_vector; + using impl::bitwise_invert_output_typeid_vector; + using impl::bitwise_invert_strided_dispatch_vector; - auto floor_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, floor_output_typeid_vector, - floor_contig_dispatch_vector, floor_strided_dispatch_vector); + auto bitwise_invert_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc(src, dst, exec_q, depends, + bitwise_invert_output_typeid_vector, + bitwise_invert_contig_dispatch_vector, + bitwise_invert_strided_dispatch_vector); }; - m.def("_floor", floor_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); + m.def("_bitwise_invert", bitwise_invert_pyapi, "", py::arg("src"), + py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); - auto floor_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, - floor_output_typeid_vector); + auto bitwise_invert_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type( + dtype, bitwise_invert_output_typeid_vector); }; - m.def("_floor_result_type", floor_result_type_pyapi); + m.def("_bitwise_invert_result_type", bitwise_invert_result_type_pyapi); } - // B10: ==== FLOOR_DIVIDE (x1, x2) + // B05: ===== BITWISE_OR (x1, x2) { - impl::populate_floor_divide_dispatch_tables(); - using impl::floor_divide_contig_dispatch_table; - using impl::floor_divide_output_id_table; - using impl::floor_divide_strided_dispatch_table; - - auto floor_divide_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, - const dpctl::tensor::usm_ndarray &src2, - const dpctl::tensor::usm_ndarray &dst, - sycl::queue &exec_q, - const std::vector &depends = - {}) { + impl::populate_bitwise_or_dispatch_tables(); + using impl::bitwise_or_contig_dispatch_table; + using impl::bitwise_or_output_id_table; + using impl::bitwise_or_strided_dispatch_table; + + auto bitwise_or_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, + const dpctl::tensor::usm_ndarray &src2, + const dpctl::tensor::usm_ndarray &dst, + sycl::queue &exec_q, + const std::vector &depends = + {}) { return py_binary_ufunc( - src1, src2, dst, exec_q, depends, floor_divide_output_id_table, + src1, src2, dst, exec_q, depends, bitwise_or_output_id_table, // function pointers to handle operation on contiguous arrays // (pointers may be nullptr) - floor_divide_contig_dispatch_table, + bitwise_or_contig_dispatch_table, // function pointers to handle operation on strided arrays (most // general case) - floor_divide_strided_dispatch_table, + bitwise_or_strided_dispatch_table, // function pointers to handle operation of c-contig matrix and // c-contig row with broadcasting (may be nullptr) td_ns::NullPtrTable< @@ -2585,103 +1274,67 @@ void init_elementwise_functions2(py::module_ m) td_ns::NullPtrTable< binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); }; - auto floor_divide_result_type_pyapi = [&](const py::dtype &dtype1, - const py::dtype &dtype2) { + auto bitwise_or_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { return py_binary_ufunc_result_type(dtype1, dtype2, - floor_divide_output_id_table); + bitwise_or_output_id_table); }; - m.def("_floor_divide", floor_divide_pyapi, "", py::arg("src1"), + m.def("_bitwise_or", bitwise_or_pyapi, "", py::arg("src1"), py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); - m.def("_floor_divide_result_type", floor_divide_result_type_pyapi, ""); + m.def("_bitwise_or_result_type", bitwise_or_result_type_pyapi, ""); - using impl::floor_divide_inplace_contig_dispatch_table; - using impl::floor_divide_inplace_strided_dispatch_table; + using impl::bitwise_or_inplace_contig_dispatch_table; + using impl::bitwise_or_inplace_strided_dispatch_table; - auto floor_divide_inplace_pyapi = + auto bitwise_or_inplace_pyapi = [&](const dpctl::tensor::usm_ndarray &src, const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, const std::vector &depends = {}) { return py_binary_inplace_ufunc( - src, dst, exec_q, depends, floor_divide_output_id_table, + src, dst, exec_q, depends, bitwise_or_output_id_table, // function pointers to handle inplace operation on // contiguous arrays (pointers may be nullptr) - floor_divide_inplace_contig_dispatch_table, + bitwise_or_inplace_contig_dispatch_table, // function pointers to handle inplace operation on strided // arrays (most general case) - floor_divide_inplace_strided_dispatch_table, + bitwise_or_inplace_strided_dispatch_table, // function pointers to handle inplace operation on // c-contig matrix with c-contig row with broadcasting // (may be nullptr) td_ns::NullPtrTable< binary_inplace_row_matrix_broadcast_impl_fn_ptr_t>{}); }; - m.def("_floor_divide_inplace", floor_divide_inplace_pyapi, "", + m.def("_bitwise_or_inplace", bitwise_or_inplace_pyapi, "", py::arg("lhs"), py::arg("rhs"), py::arg("sycl_queue"), py::arg("depends") = py::list()); } - // B11: ==== GREATER (x1, x2) + // B06: ===== BITWISE_RIGHT_SHIFT (x1, x2) { - impl::populate_greater_dispatch_tables(); - using impl::greater_contig_dispatch_table; - using impl::greater_output_id_table; - using impl::greater_strided_dispatch_table; - - auto greater_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, - const dpctl::tensor::usm_ndarray &src2, - const dpctl::tensor::usm_ndarray &dst, - sycl::queue &exec_q, - const std::vector &depends = {}) { - return py_binary_ufunc( - src1, src2, dst, exec_q, depends, greater_output_id_table, - // function pointers to handle operation on contiguous arrays - // (pointers may be nullptr) - greater_contig_dispatch_table, - // function pointers to handle operation on strided arrays (most - // general case) - greater_strided_dispatch_table, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); - }; - auto greater_result_type_pyapi = [&](const py::dtype &dtype1, - const py::dtype &dtype2) { - return py_binary_ufunc_result_type(dtype1, dtype2, - greater_output_id_table); - }; - m.def("_greater", greater_pyapi, "", py::arg("src1"), py::arg("src2"), - py::arg("dst"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - m.def("_greater_result_type", greater_result_type_pyapi, ""); - } + impl::populate_bitwise_right_shift_dispatch_tables(); + using impl::bitwise_right_shift_contig_dispatch_table; + using impl::bitwise_right_shift_output_id_table; + using impl::bitwise_right_shift_strided_dispatch_table; - // B12: ==== GREATER_EQUAL (x1, x2) - { - impl::populate_greater_equal_dispatch_tables(); - using impl::greater_equal_contig_dispatch_table; - using impl::greater_equal_output_id_table; - using impl::greater_equal_strided_dispatch_table; - - auto greater_equal_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, - const dpctl::tensor::usm_ndarray &src2, - const dpctl::tensor::usm_ndarray &dst, - sycl::queue &exec_q, - const std::vector &depends = - {}) { + auto bitwise_right_shift_pyapi = [&](const dpctl::tensor::usm_ndarray + &src1, + const dpctl::tensor::usm_ndarray + &src2, + const dpctl::tensor::usm_ndarray + &dst, + sycl::queue &exec_q, + const std::vector + &depends = {}) { return py_binary_ufunc( - src1, src2, dst, exec_q, depends, greater_equal_output_id_table, + src1, src2, dst, exec_q, depends, + bitwise_right_shift_output_id_table, // function pointers to handle operation on contiguous arrays // (pointers may be nullptr) - greater_equal_contig_dispatch_table, + bitwise_right_shift_contig_dispatch_table, // function pointers to handle operation on strided arrays (most // general case) - greater_equal_strided_dispatch_table, + bitwise_right_shift_strided_dispatch_table, // function pointers to handle operation of c-contig matrix and // c-contig row with broadcasting (may be nullptr) td_ns::NullPtrTable< @@ -2691,198 +1344,65 @@ void init_elementwise_functions2(py::module_ m) td_ns::NullPtrTable< binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); }; - auto greater_equal_result_type_pyapi = [&](const py::dtype &dtype1, - const py::dtype &dtype2) { - return py_binary_ufunc_result_type(dtype1, dtype2, - greater_equal_output_id_table); - }; - m.def("_greater_equal", greater_equal_pyapi, "", py::arg("src1"), - py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - m.def("_greater_equal_result_type", greater_equal_result_type_pyapi, - ""); - } - - // U16: ==== IMAG (x) - { - impl::populate_imag_dispatch_vectors(); - using impl::imag_contig_dispatch_vector; - using impl::imag_output_typeid_vector; - using impl::imag_strided_dispatch_vector; - - auto imag_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, imag_output_typeid_vector, - imag_contig_dispatch_vector, imag_strided_dispatch_vector); - }; - m.def("_imag", imag_pyapi, "", py::arg("src"), py::arg("dst"), + auto bitwise_right_shift_result_type_pyapi = + [&](const py::dtype &dtype1, const py::dtype &dtype2) { + return py_binary_ufunc_result_type( + dtype1, dtype2, bitwise_right_shift_output_id_table); + }; + m.def("_bitwise_right_shift", bitwise_right_shift_pyapi, "", + py::arg("src1"), py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); + m.def("_bitwise_right_shift_result_type", + bitwise_right_shift_result_type_pyapi, ""); - auto imag_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, imag_output_typeid_vector); - }; - m.def("_imag_result_type", imag_result_type_pyapi); - } - - // U17: ==== ISFINITE (x) - { - impl::populate_isfinite_dispatch_vectors(); + using impl::bitwise_right_shift_inplace_contig_dispatch_table; + using impl::bitwise_right_shift_inplace_strided_dispatch_table; - using impl::isfinite_contig_dispatch_vector; - using impl::isfinite_output_typeid_vector; - using impl::isfinite_strided_dispatch_vector; - auto isfinite_pyapi = + auto bitwise_right_shift_inplace_pyapi = [&](const dpctl::tensor::usm_ndarray &src, const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, const std::vector &depends = {}) { - return py_unary_ufunc(src, dst, exec_q, depends, - isfinite_output_typeid_vector, - isfinite_contig_dispatch_vector, - isfinite_strided_dispatch_vector); + return py_binary_inplace_ufunc( + src, dst, exec_q, depends, + bitwise_right_shift_output_id_table, + // function pointers to handle inplace operation on + // contiguous arrays (pointers may be nullptr) + bitwise_right_shift_inplace_contig_dispatch_table, + // function pointers to handle inplace operation on strided + // arrays (most general case) + bitwise_right_shift_inplace_strided_dispatch_table, + // function pointers to handle inplace operation on + // c-contig matrix with c-contig row with broadcasting + // (may be nullptr) + td_ns::NullPtrTable< + binary_inplace_row_matrix_broadcast_impl_fn_ptr_t>{}); }; - auto isfinite_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, - isfinite_output_typeid_vector); - }; - m.def("_isfinite", isfinite_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - m.def("_isfinite_result_type", isfinite_result_type_pyapi, ""); - } - - // U18: ==== ISINF (x) - { - impl::populate_isinf_dispatch_vectors(); - - using impl::isinf_contig_dispatch_vector; - using impl::isinf_output_typeid_vector; - using impl::isinf_strided_dispatch_vector; - auto isinf_pyapi = [&](const dpctl::tensor::usm_ndarray &src, - const dpctl::tensor::usm_ndarray &dst, - sycl::queue &exec_q, - const std::vector &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, isinf_output_typeid_vector, - isinf_contig_dispatch_vector, isinf_strided_dispatch_vector); - }; - auto isinf_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, - isinf_output_typeid_vector); - }; - m.def("_isinf", isinf_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - m.def("_isinf_result_type", isinf_result_type_pyapi, ""); - } - - // U19: ==== ISNAN (x) - { - impl::populate_isnan_dispatch_vectors(); - - using impl::isnan_contig_dispatch_vector; - using impl::isnan_output_typeid_vector; - using impl::isnan_strided_dispatch_vector; - auto isnan_pyapi = [&](const dpctl::tensor::usm_ndarray &src, - const dpctl::tensor::usm_ndarray &dst, - sycl::queue &exec_q, - const std::vector &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, isnan_output_typeid_vector, - isnan_contig_dispatch_vector, isnan_strided_dispatch_vector); - }; - auto isnan_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, - isnan_output_typeid_vector); - }; - m.def("_isnan", isnan_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - m.def("_isnan_result_type", isnan_result_type_pyapi, ""); - } - - // B24: ==== HYPOT (x1, x2) - { - impl::populate_hypot_dispatch_tables(); - using impl::hypot_contig_dispatch_table; - using impl::hypot_output_id_table; - using impl::hypot_strided_dispatch_table; - - auto hypot_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, - const dpctl::tensor::usm_ndarray &src2, - const dpctl::tensor::usm_ndarray &dst, - sycl::queue &exec_q, - const std::vector &depends = {}) { - return py_binary_ufunc( - src1, src2, dst, exec_q, depends, hypot_output_id_table, - // function pointers to handle operation on contiguous arrays - // (pointers may be nullptr) - hypot_contig_dispatch_table, - // function pointers to handle operation on strided arrays (most - // general case) - hypot_strided_dispatch_table, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); - }; - auto hypot_result_type_pyapi = [&](const py::dtype &dtype1, - const py::dtype &dtype2) { - return py_binary_ufunc_result_type(dtype1, dtype2, - hypot_output_id_table); - }; - m.def("_hypot", hypot_pyapi, "", py::arg("src1"), py::arg("src2"), - py::arg("dst"), py::arg("sycl_queue"), + m.def("_bitwise_right_shift_inplace", bitwise_right_shift_inplace_pyapi, + "", py::arg("lhs"), py::arg("rhs"), py::arg("sycl_queue"), py::arg("depends") = py::list()); - m.def("_hypot_result_type", hypot_result_type_pyapi, ""); } - // U37: ==== CBRT (x) + // B07: ===== BITWISE_XOR (x1, x2) { - impl::populate_cbrt_dispatch_vectors(); - using impl::cbrt_contig_dispatch_vector; - using impl::cbrt_output_typeid_vector; - using impl::cbrt_strided_dispatch_vector; - - auto cbrt_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, cbrt_output_typeid_vector, - cbrt_contig_dispatch_vector, cbrt_strided_dispatch_vector); - }; - m.def("_cbrt", cbrt_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto cbrt_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, cbrt_output_typeid_vector); - }; - m.def("_cbrt_result_type", cbrt_result_type_pyapi); - } + impl::populate_bitwise_xor_dispatch_tables(); + using impl::bitwise_xor_contig_dispatch_table; + using impl::bitwise_xor_output_id_table; + using impl::bitwise_xor_strided_dispatch_table; - // B25: ==== COPYSIGN (x1, x2) - { - impl::populate_copysign_dispatch_tables(); - using impl::copysign_contig_dispatch_table; - using impl::copysign_output_id_table; - using impl::copysign_strided_dispatch_table; - - auto copysign_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, - const dpctl::tensor::usm_ndarray &src2, - const dpctl::tensor::usm_ndarray &dst, - sycl::queue &exec_q, - const std::vector &depends = - {}) { + auto bitwise_xor_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, + const dpctl::tensor::usm_ndarray &src2, + const dpctl::tensor::usm_ndarray &dst, + sycl::queue &exec_q, + const std::vector &depends = + {}) { return py_binary_ufunc( - src1, src2, dst, exec_q, depends, copysign_output_id_table, + src1, src2, dst, exec_q, depends, bitwise_xor_output_id_table, // function pointers to handle operation on contiguous arrays // (pointers may be nullptr) - copysign_contig_dispatch_table, + bitwise_xor_contig_dispatch_table, // function pointers to handle operation on strided arrays (most // general case) - copysign_strided_dispatch_table, + bitwise_xor_strided_dispatch_table, // function pointers to handle operation of c-contig matrix and // c-contig row with broadcasting (may be nullptr) td_ns::NullPtrTable< @@ -2892,38 +1412,40 @@ void init_elementwise_functions2(py::module_ m) td_ns::NullPtrTable< binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); }; - auto copysign_result_type_pyapi = [&](const py::dtype &dtype1, - const py::dtype &dtype2) { + auto bitwise_xor_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { return py_binary_ufunc_result_type(dtype1, dtype2, - copysign_output_id_table); + bitwise_xor_output_id_table); }; - m.def("_copysign", copysign_pyapi, "", py::arg("src1"), py::arg("src2"), - py::arg("dst"), py::arg("sycl_queue"), + m.def("_bitwise_xor", bitwise_xor_pyapi, "", py::arg("src1"), + py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); - m.def("_copysign_result_type", copysign_result_type_pyapi, ""); - } - - // U38: ==== EXP2 (x) - { - impl::populate_exp2_dispatch_vectors(); - using impl::exp2_contig_dispatch_vector; - using impl::exp2_output_typeid_vector; - using impl::exp2_strided_dispatch_vector; + m.def("_bitwise_xor_result_type", bitwise_xor_result_type_pyapi, ""); - auto exp2_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, exp2_output_typeid_vector, - exp2_contig_dispatch_vector, exp2_strided_dispatch_vector); - }; - m.def("_exp2", exp2_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); + using impl::bitwise_xor_inplace_contig_dispatch_table; + using impl::bitwise_xor_inplace_strided_dispatch_table; - auto exp2_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, exp2_output_typeid_vector); - }; - m.def("_exp2_result_type", exp2_result_type_pyapi); + auto bitwise_xor_inplace_pyapi = + [&](const dpctl::tensor::usm_ndarray &src, + const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, + const std::vector &depends = {}) { + return py_binary_inplace_ufunc( + src, dst, exec_q, depends, bitwise_xor_output_id_table, + // function pointers to handle inplace operation on + // contiguous arrays (pointers may be nullptr) + bitwise_xor_inplace_contig_dispatch_table, + // function pointers to handle inplace operation on strided + // arrays (most general case) + bitwise_xor_inplace_strided_dispatch_table, + // function pointers to handle inplace operation on + // c-contig matrix with c-contig row with broadcasting + // (may be nullptr) + td_ns::NullPtrTable< + binary_inplace_row_matrix_broadcast_impl_fn_ptr_t>{}); + }; + m.def("_bitwise_xor_inplace", bitwise_xor_inplace_pyapi, "", + py::arg("lhs"), py::arg("rhs"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); } } diff --git a/dpctl/tensor/libtensor/source/elementwise_functions3.cpp b/dpctl/tensor/libtensor/source/elementwise_functions3.cpp new file mode 100644 index 0000000000..6c1ed91506 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions3.cpp @@ -0,0 +1,1407 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#include "dpctl4pybind11.hpp" +#include +#include +#include +#include +#include + +#include "elementwise_functions.hpp" +#include "elementwise_functions3.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/not_equal.hpp" +#include "kernels/elementwise_functions/positive.hpp" +#include "kernels/elementwise_functions/pow.hpp" +#include "kernels/elementwise_functions/proj.hpp" +#include "kernels/elementwise_functions/real.hpp" +#include "kernels/elementwise_functions/remainder.hpp" +#include "kernels/elementwise_functions/round.hpp" +#include "kernels/elementwise_functions/rsqrt.hpp" +#include "kernels/elementwise_functions/sign.hpp" +#include "kernels/elementwise_functions/signbit.hpp" +#include "kernels/elementwise_functions/sin.hpp" +#include "kernels/elementwise_functions/sinh.hpp" +#include "kernels/elementwise_functions/sqrt.hpp" +#include "kernels/elementwise_functions/square.hpp" +#include "kernels/elementwise_functions/subtract.hpp" +#include "kernels/elementwise_functions/tan.hpp" +#include "kernels/elementwise_functions/tanh.hpp" +#include "kernels/elementwise_functions/trunc.hpp" + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::binary_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_strided_impl_fn_ptr_t; +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +using ew_cmn_ns::binary_inplace_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_inplace_row_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_inplace_strided_impl_fn_ptr_t; + +// B20: ==== NOT_EQUAL (x1, x2) +namespace impl +{ +namespace not_equal_fn_ns = dpctl::tensor::kernels::not_equal; + +static binary_contig_impl_fn_ptr_t + not_equal_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static int not_equal_output_id_table[td_ns::num_types][td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + not_equal_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +void populate_not_equal_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = not_equal_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::NotEqualTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(not_equal_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::NotEqualStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(not_equal_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::NotEqualContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(not_equal_contig_dispatch_table); +}; +} // namespace impl + +// U26: ==== POSITIVE (x) +namespace impl +{ + +namespace positive_fn_ns = dpctl::tensor::kernels::positive; + +static unary_contig_impl_fn_ptr_t + positive_contig_dispatch_vector[td_ns::num_types]; +static int positive_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + positive_strided_dispatch_vector[td_ns::num_types]; + +void populate_positive_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = positive_fn_ns; + + using fn_ns::PositiveContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(positive_contig_dispatch_vector); + + using fn_ns::PositiveStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(positive_strided_dispatch_vector); + + using fn_ns::PositiveTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(positive_output_typeid_vector); +} + +} // namespace impl + +// B21: ==== POW (x1, x2) +namespace impl +{ + +namespace pow_fn_ns = dpctl::tensor::kernels::pow; + +static binary_contig_impl_fn_ptr_t pow_contig_dispatch_table[td_ns::num_types] + [td_ns::num_types]; +static int pow_output_id_table[td_ns::num_types][td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + pow_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +static binary_inplace_contig_impl_fn_ptr_t + pow_inplace_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static binary_inplace_strided_impl_fn_ptr_t + pow_inplace_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +void populate_pow_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = pow_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::PowTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(pow_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::PowStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(pow_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::PowContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(pow_contig_dispatch_table); + + // function pointers for inplace operation on general strided arrays + using fn_ns::PowInplaceStridedFactory; + DispatchTableBuilder + dtb4; + dtb4.populate_dispatch_table(pow_inplace_strided_dispatch_table); + + // function pointers for inplace operation on contiguous inputs and output + using fn_ns::PowInplaceContigFactory; + DispatchTableBuilder + dtb5; + dtb5.populate_dispatch_table(pow_inplace_contig_dispatch_table); +}; + +} // namespace impl + +// U??: ==== PROJ (x) +namespace impl +{ + +namespace proj_fn_ns = dpctl::tensor::kernels::proj; + +static unary_contig_impl_fn_ptr_t proj_contig_dispatch_vector[td_ns::num_types]; +static int proj_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + proj_strided_dispatch_vector[td_ns::num_types]; + +void populate_proj_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = proj_fn_ns; + + using fn_ns::ProjContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(proj_contig_dispatch_vector); + + using fn_ns::ProjStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(proj_strided_dispatch_vector); + + using fn_ns::ProjTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(proj_output_typeid_vector); +} +} // namespace impl + +// U27: ==== REAL (x) +namespace impl +{ + +namespace real_fn_ns = dpctl::tensor::kernels::real; + +static unary_contig_impl_fn_ptr_t real_contig_dispatch_vector[td_ns::num_types]; +static int real_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + real_strided_dispatch_vector[td_ns::num_types]; + +void populate_real_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = real_fn_ns; + + using fn_ns::RealContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(real_contig_dispatch_vector); + + using fn_ns::RealStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(real_strided_dispatch_vector); + + using fn_ns::RealTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(real_output_typeid_vector); +} +} // namespace impl + +// B22: ==== REMAINDER (x1, x2) +namespace impl +{ + +namespace remainder_fn_ns = dpctl::tensor::kernels::remainder; + +static binary_contig_impl_fn_ptr_t + remainder_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static int remainder_output_id_table[td_ns::num_types][td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + remainder_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +static binary_inplace_contig_impl_fn_ptr_t + remainder_inplace_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static binary_inplace_strided_impl_fn_ptr_t + remainder_inplace_strided_dispatch_table[td_ns::num_types] + [td_ns::num_types]; + +void populate_remainder_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = remainder_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::RemainderTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(remainder_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::RemainderStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(remainder_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::RemainderContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(remainder_contig_dispatch_table); + + // function pointers for inplace operation on general strided arrays + using fn_ns::RemainderInplaceStridedFactory; + DispatchTableBuilder + dtb4; + dtb4.populate_dispatch_table(remainder_inplace_strided_dispatch_table); + + // function pointers for inplace operation on contiguous inputs and output + using fn_ns::RemainderInplaceContigFactory; + DispatchTableBuilder + dtb5; + dtb5.populate_dispatch_table(remainder_inplace_contig_dispatch_table); +} + +} // namespace impl + +// U28: ==== ROUND (x) +namespace impl +{ + +namespace round_fn_ns = dpctl::tensor::kernels::round; + +static unary_contig_impl_fn_ptr_t + round_contig_dispatch_vector[td_ns::num_types]; +static int round_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + round_strided_dispatch_vector[td_ns::num_types]; + +void populate_round_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = round_fn_ns; + + using fn_ns::RoundContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(round_contig_dispatch_vector); + + using fn_ns::RoundStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(round_strided_dispatch_vector); + + using fn_ns::RoundTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(round_output_typeid_vector); +} + +} // namespace impl + +// U29: ==== SIGN (x) +namespace impl +{ + +namespace sign_fn_ns = dpctl::tensor::kernels::sign; + +static unary_contig_impl_fn_ptr_t sign_contig_dispatch_vector[td_ns::num_types]; +static int sign_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + sign_strided_dispatch_vector[td_ns::num_types]; + +void populate_sign_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = sign_fn_ns; + + using fn_ns::SignContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(sign_contig_dispatch_vector); + + using fn_ns::SignStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(sign_strided_dispatch_vector); + + using fn_ns::SignTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(sign_output_typeid_vector); +} + +} // namespace impl + +// ==== SIGNBIT (x) +namespace impl +{ + +namespace signbit_fn_ns = dpctl::tensor::kernels::signbit; + +static unary_contig_impl_fn_ptr_t + signbit_contig_dispatch_vector[td_ns::num_types]; +static int signbit_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + signbit_strided_dispatch_vector[td_ns::num_types]; + +void populate_signbit_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = signbit_fn_ns; + + using fn_ns::SignbitContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(signbit_contig_dispatch_vector); + + using fn_ns::SignbitStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(signbit_strided_dispatch_vector); + + using fn_ns::SignbitTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(signbit_output_typeid_vector); +} + +} // namespace impl + +// U30: ==== SIN (x) +namespace impl +{ + +namespace sin_fn_ns = dpctl::tensor::kernels::sin; + +static unary_contig_impl_fn_ptr_t sin_contig_dispatch_vector[td_ns::num_types]; +static int sin_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + sin_strided_dispatch_vector[td_ns::num_types]; + +void populate_sin_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = sin_fn_ns; + + using fn_ns::SinContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(sin_contig_dispatch_vector); + + using fn_ns::SinStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(sin_strided_dispatch_vector); + + using fn_ns::SinTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(sin_output_typeid_vector); +} + +} // namespace impl + +// U31: ==== SINH (x) +namespace impl +{ + +namespace sinh_fn_ns = dpctl::tensor::kernels::sinh; + +static unary_contig_impl_fn_ptr_t sinh_contig_dispatch_vector[td_ns::num_types]; +static int sinh_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + sinh_strided_dispatch_vector[td_ns::num_types]; + +void populate_sinh_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = sinh_fn_ns; + + using fn_ns::SinhContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(sinh_contig_dispatch_vector); + + using fn_ns::SinhStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(sinh_strided_dispatch_vector); + + using fn_ns::SinhTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(sinh_output_typeid_vector); +} + +} // namespace impl + +// U32: ==== SQUARE (x) +namespace impl +{ + +namespace square_fn_ns = dpctl::tensor::kernels::square; + +static unary_contig_impl_fn_ptr_t + square_contig_dispatch_vector[td_ns::num_types]; +static int square_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + square_strided_dispatch_vector[td_ns::num_types]; + +void populate_square_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = square_fn_ns; + + using fn_ns::SquareContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(square_contig_dispatch_vector); + + using fn_ns::SquareStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(square_strided_dispatch_vector); + + using fn_ns::SquareTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(square_output_typeid_vector); +} + +} // namespace impl + +// U33: ==== SQRT (x) +namespace impl +{ + +namespace sqrt_fn_ns = dpctl::tensor::kernels::sqrt; + +static unary_contig_impl_fn_ptr_t sqrt_contig_dispatch_vector[td_ns::num_types]; +static int sqrt_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + sqrt_strided_dispatch_vector[td_ns::num_types]; + +void populate_sqrt_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = sqrt_fn_ns; + + using fn_ns::SqrtContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(sqrt_contig_dispatch_vector); + + using fn_ns::SqrtStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(sqrt_strided_dispatch_vector); + + using fn_ns::SqrtTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(sqrt_output_typeid_vector); +} + +} // namespace impl + +// B23: ==== SUBTRACT (x1, x2) +namespace impl +{ +namespace subtract_fn_ns = dpctl::tensor::kernels::subtract; + +static binary_contig_impl_fn_ptr_t + subtract_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static int subtract_output_id_table[td_ns::num_types][td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + subtract_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +// sub(matrix, row) +static binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t + subtract_contig_matrix_contig_row_broadcast_dispatch_table + [td_ns::num_types][td_ns::num_types]; + +// sub(row, matrix) +static binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t + subtract_contig_row_contig_matrix_broadcast_dispatch_table + [td_ns::num_types][td_ns::num_types]; + +static binary_inplace_contig_impl_fn_ptr_t + subtract_inplace_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static binary_inplace_strided_impl_fn_ptr_t + subtract_inplace_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; +static binary_inplace_row_matrix_broadcast_impl_fn_ptr_t + subtract_inplace_row_matrix_dispatch_table[td_ns::num_types] + [td_ns::num_types]; + +void populate_subtract_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = subtract_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::SubtractTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(subtract_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::SubtractStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(subtract_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::SubtractContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(subtract_contig_dispatch_table); + + // function pointers for operation on contiguous matrix, contiguous row + // with contiguous matrix output + using fn_ns::SubtractContigMatrixContigRowBroadcastFactory; + DispatchTableBuilder< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t, + SubtractContigMatrixContigRowBroadcastFactory, num_types> + dtb4; + dtb4.populate_dispatch_table( + subtract_contig_matrix_contig_row_broadcast_dispatch_table); + + // function pointers for operation on contiguous row, contiguous matrix + // with contiguous matrix output + using fn_ns::SubtractContigRowContigMatrixBroadcastFactory; + DispatchTableBuilder< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t, + SubtractContigRowContigMatrixBroadcastFactory, num_types> + dtb5; + dtb5.populate_dispatch_table( + subtract_contig_row_contig_matrix_broadcast_dispatch_table); + + // function pointers for inplace operation on general strided arrays + using fn_ns::SubtractInplaceStridedFactory; + DispatchTableBuilder + dtb6; + dtb6.populate_dispatch_table(subtract_inplace_strided_dispatch_table); + + // function pointers for inplace operation on contiguous inputs and output + using fn_ns::SubtractInplaceContigFactory; + DispatchTableBuilder + dtb7; + dtb7.populate_dispatch_table(subtract_inplace_contig_dispatch_table); + + // function pointers for inplace operation on contiguous matrix + // and contiguous row + using fn_ns::SubtractInplaceRowMatrixBroadcastFactory; + DispatchTableBuilder + dtb8; + dtb8.populate_dispatch_table(subtract_inplace_row_matrix_dispatch_table); +}; + +} // namespace impl + +// U34: ==== TAN (x) +namespace impl +{ + +namespace tan_fn_ns = dpctl::tensor::kernels::tan; + +static unary_contig_impl_fn_ptr_t tan_contig_dispatch_vector[td_ns::num_types]; +static int tan_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + tan_strided_dispatch_vector[td_ns::num_types]; + +void populate_tan_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = tan_fn_ns; + + using fn_ns::TanContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(tan_contig_dispatch_vector); + + using fn_ns::TanStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(tan_strided_dispatch_vector); + + using fn_ns::TanTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(tan_output_typeid_vector); +} + +} // namespace impl + +// U35: ==== TANH (x) +namespace impl +{ + +namespace tanh_fn_ns = dpctl::tensor::kernels::tanh; + +static unary_contig_impl_fn_ptr_t tanh_contig_dispatch_vector[td_ns::num_types]; +static int tanh_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + tanh_strided_dispatch_vector[td_ns::num_types]; + +void populate_tanh_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = tanh_fn_ns; + + using fn_ns::TanhContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(tanh_contig_dispatch_vector); + + using fn_ns::TanhStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(tanh_strided_dispatch_vector); + + using fn_ns::TanhTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(tanh_output_typeid_vector); +} + +} // namespace impl + +// U36: ==== TRUNC (x) +namespace impl +{ + +namespace trunc_fn_ns = dpctl::tensor::kernels::trunc; + +static unary_contig_impl_fn_ptr_t + trunc_contig_dispatch_vector[td_ns::num_types]; +static int trunc_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + trunc_strided_dispatch_vector[td_ns::num_types]; + +void populate_trunc_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = trunc_fn_ns; + + using fn_ns::TruncContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(trunc_contig_dispatch_vector); + + using fn_ns::TruncStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(trunc_strided_dispatch_vector); + + using fn_ns::TruncTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(trunc_output_typeid_vector); +} + +} // namespace impl + +// U39: ==== RSQRT (x) +namespace impl +{ + +namespace rsqrt_fn_ns = dpctl::tensor::kernels::rsqrt; + +static unary_contig_impl_fn_ptr_t + rsqrt_contig_dispatch_vector[td_ns::num_types]; +static int rsqrt_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + rsqrt_strided_dispatch_vector[td_ns::num_types]; + +void populate_rsqrt_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = rsqrt_fn_ns; + + using fn_ns::RsqrtContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(rsqrt_contig_dispatch_vector); + + using fn_ns::RsqrtStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(rsqrt_strided_dispatch_vector); + + using fn_ns::RsqrtTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(rsqrt_output_typeid_vector); +} + +} // namespace impl + +// ========================================================================================== +// // + +namespace py = pybind11; + +void init_elementwise_functions3(py::module_ m) +{ + + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + + // B20: ==== NOT_EQUAL (x1, x2) + { + impl::populate_not_equal_dispatch_tables(); + using impl::not_equal_contig_dispatch_table; + using impl::not_equal_output_id_table; + using impl::not_equal_strided_dispatch_table; + + auto not_equal_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, + const dpctl::tensor::usm_ndarray &src2, + const dpctl::tensor::usm_ndarray &dst, + sycl::queue &exec_q, + const std::vector &depends = + {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, not_equal_output_id_table, + // function pointers to handle operation on contiguous arrays + // (pointers may be nullptr) + not_equal_contig_dispatch_table, + // function pointers to handle operation on strided arrays (most + // general case) + not_equal_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + auto not_equal_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + not_equal_output_id_table); + }; + m.def("_not_equal", not_equal_pyapi, "", py::arg("src1"), + py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_not_equal_result_type", not_equal_result_type_pyapi, ""); + } + + // U26: ==== POSITIVE (x) + { + impl::populate_positive_dispatch_vectors(); + using impl::positive_contig_dispatch_vector; + using impl::positive_output_typeid_vector; + using impl::positive_strided_dispatch_vector; + + auto positive_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc(src, dst, exec_q, depends, + positive_output_typeid_vector, + positive_contig_dispatch_vector, + positive_strided_dispatch_vector); + }; + m.def("_positive", positive_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto positive_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, + positive_output_typeid_vector); + }; + m.def("_positive_result_type", positive_result_type_pyapi); + } + + // B21: ==== POW (x1, x2) + { + impl::populate_pow_dispatch_tables(); + using impl::pow_contig_dispatch_table; + using impl::pow_output_id_table; + using impl::pow_strided_dispatch_table; + + auto pow_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, + const dpctl::tensor::usm_ndarray &src2, + const dpctl::tensor::usm_ndarray &dst, + sycl::queue &exec_q, + const std::vector &depends = {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, pow_output_id_table, + // function pointers to handle operation on contiguous arrays + // (pointers may be nullptr) + pow_contig_dispatch_table, + // function pointers to handle operation on strided arrays (most + // general case) + pow_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + auto pow_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + pow_output_id_table); + }; + m.def("_pow", pow_pyapi, "", py::arg("src1"), py::arg("src2"), + py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_pow_result_type", pow_result_type_pyapi, ""); + + using impl::pow_inplace_contig_dispatch_table; + using impl::pow_inplace_strided_dispatch_table; + + auto pow_inplace_pyapi = + [&](const dpctl::tensor::usm_ndarray &src, + const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, + const std::vector &depends = {}) { + return py_binary_inplace_ufunc( + src, dst, exec_q, depends, pow_output_id_table, + // function pointers to handle inplace operation on + // contiguous arrays (pointers may be nullptr) + pow_inplace_contig_dispatch_table, + // function pointers to handle inplace operation on strided + // arrays (most general case) + pow_inplace_strided_dispatch_table, + // function pointers to handle inplace operation on + // c-contig matrix with c-contig row with broadcasting + // (may be nullptr) + td_ns::NullPtrTable< + binary_inplace_row_matrix_broadcast_impl_fn_ptr_t>{}); + }; + m.def("_pow_inplace", pow_inplace_pyapi, "", py::arg("lhs"), + py::arg("rhs"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + } + + // U??: ==== PROJ (x) + { + impl::populate_proj_dispatch_vectors(); + using impl::proj_contig_dispatch_vector; + using impl::proj_output_typeid_vector; + using impl::proj_strided_dispatch_vector; + + auto proj_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, proj_output_typeid_vector, + proj_contig_dispatch_vector, proj_strided_dispatch_vector); + }; + m.def("_proj", proj_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto proj_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, proj_output_typeid_vector); + }; + m.def("_proj_result_type", proj_result_type_pyapi); + } + + // U27: ==== REAL (x) + { + impl::populate_real_dispatch_vectors(); + using impl::real_contig_dispatch_vector; + using impl::real_output_typeid_vector; + using impl::real_strided_dispatch_vector; + + auto real_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, real_output_typeid_vector, + real_contig_dispatch_vector, real_strided_dispatch_vector); + }; + m.def("_real", real_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto real_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, real_output_typeid_vector); + }; + m.def("_real_result_type", real_result_type_pyapi); + } + + // B22: ==== REMAINDER (x1, x2) + { + impl::populate_remainder_dispatch_tables(); + using impl::remainder_contig_dispatch_table; + using impl::remainder_output_id_table; + using impl::remainder_strided_dispatch_table; + + auto remainder_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, + const dpctl::tensor::usm_ndarray &src2, + const dpctl::tensor::usm_ndarray &dst, + sycl::queue &exec_q, + const std::vector &depends = + {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, remainder_output_id_table, + // function pointers to handle operation on contiguous arrays + // (pointers may be nullptr) + remainder_contig_dispatch_table, + // function pointers to handle operation on strided arrays (most + // general case) + remainder_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + auto remainder_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + remainder_output_id_table); + }; + m.def("_remainder", remainder_pyapi, "", py::arg("src1"), + py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_remainder_result_type", remainder_result_type_pyapi, ""); + + using impl::remainder_inplace_contig_dispatch_table; + using impl::remainder_inplace_strided_dispatch_table; + + auto remainder_inplace_pyapi = + [&](const dpctl::tensor::usm_ndarray &src, + const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, + const std::vector &depends = {}) { + return py_binary_inplace_ufunc( + src, dst, exec_q, depends, remainder_output_id_table, + // function pointers to handle inplace operation on + // contiguous arrays (pointers may be nullptr) + remainder_inplace_contig_dispatch_table, + // function pointers to handle inplace operation on strided + // arrays (most general case) + remainder_inplace_strided_dispatch_table, + // function pointers to handle inplace operation on + // c-contig matrix with c-contig row with broadcasting + // (may be nullptr) + td_ns::NullPtrTable< + binary_inplace_row_matrix_broadcast_impl_fn_ptr_t>{}); + }; + m.def("_remainder_inplace", remainder_inplace_pyapi, "", py::arg("lhs"), + py::arg("rhs"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + } + + // U28: ==== ROUND (x) + { + impl::populate_round_dispatch_vectors(); + using impl::round_contig_dispatch_vector; + using impl::round_output_typeid_vector; + using impl::round_strided_dispatch_vector; + + auto round_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, round_output_typeid_vector, + round_contig_dispatch_vector, round_strided_dispatch_vector); + }; + m.def("_round", round_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto round_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, + round_output_typeid_vector); + }; + m.def("_round_result_type", round_result_type_pyapi); + } + + // U29: ==== SIGN (x) + { + impl::populate_sign_dispatch_vectors(); + using impl::sign_contig_dispatch_vector; + using impl::sign_output_typeid_vector; + using impl::sign_strided_dispatch_vector; + + auto sign_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, sign_output_typeid_vector, + sign_contig_dispatch_vector, sign_strided_dispatch_vector); + }; + m.def("_sign", sign_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto sign_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, sign_output_typeid_vector); + }; + m.def("_sign_result_type", sign_result_type_pyapi); + } + + // ==== SIGNBIT (x) + { + impl::populate_signbit_dispatch_vectors(); + using impl::signbit_contig_dispatch_vector; + using impl::signbit_output_typeid_vector; + using impl::signbit_strided_dispatch_vector; + + auto signbit_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc(src, dst, exec_q, depends, + signbit_output_typeid_vector, + signbit_contig_dispatch_vector, + signbit_strided_dispatch_vector); + }; + m.def("_signbit", signbit_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto signbit_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, + signbit_output_typeid_vector); + }; + m.def("_signbit_result_type", signbit_result_type_pyapi); + } + + // U30: ==== SIN (x) + { + impl::populate_sin_dispatch_vectors(); + using impl::sin_contig_dispatch_vector; + using impl::sin_output_typeid_vector; + using impl::sin_strided_dispatch_vector; + + auto sin_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, sin_output_typeid_vector, + sin_contig_dispatch_vector, sin_strided_dispatch_vector); + }; + m.def("_sin", sin_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto sin_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, sin_output_typeid_vector); + }; + m.def("_sin_result_type", sin_result_type_pyapi); + } + // U31: ==== SINH (x) + { + impl::populate_sinh_dispatch_vectors(); + using impl::sinh_contig_dispatch_vector; + using impl::sinh_output_typeid_vector; + using impl::sinh_strided_dispatch_vector; + + auto sinh_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, sinh_output_typeid_vector, + sinh_contig_dispatch_vector, sinh_strided_dispatch_vector); + }; + m.def("_sinh", sinh_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto sinh_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, sinh_output_typeid_vector); + }; + m.def("_sinh_result_type", sinh_result_type_pyapi); + } + + // U32: ==== SQUARE (x) + { + impl::populate_square_dispatch_vectors(); + using impl::square_contig_dispatch_vector; + using impl::square_output_typeid_vector; + using impl::square_strided_dispatch_vector; + + auto square_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, square_output_typeid_vector, + square_contig_dispatch_vector, square_strided_dispatch_vector); + }; + m.def("_square", square_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto square_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, + square_output_typeid_vector); + }; + m.def("_square_result_type", square_result_type_pyapi); + } + + // U33: ==== SQRT (x) + { + impl::populate_sqrt_dispatch_vectors(); + using impl::sqrt_contig_dispatch_vector; + using impl::sqrt_output_typeid_vector; + using impl::sqrt_strided_dispatch_vector; + + auto sqrt_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, sqrt_output_typeid_vector, + sqrt_contig_dispatch_vector, sqrt_strided_dispatch_vector); + }; + m.def("_sqrt", sqrt_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto sqrt_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, sqrt_output_typeid_vector); + }; + m.def("_sqrt_result_type", sqrt_result_type_pyapi); + } + + // B23: ==== SUBTRACT (x1, x2) + { + impl::populate_subtract_dispatch_tables(); + using impl::subtract_contig_dispatch_table; + using impl::subtract_contig_matrix_contig_row_broadcast_dispatch_table; + using impl::subtract_contig_row_contig_matrix_broadcast_dispatch_table; + using impl::subtract_output_id_table; + using impl::subtract_strided_dispatch_table; + + auto subtract_pyapi = + [&](const dpctl::tensor::usm_ndarray &src1, + const dpctl::tensor::usm_ndarray &src2, + const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, + const std::vector &depends = {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, subtract_output_id_table, + // function pointers to handle operation on contiguous + // arrays (pointers may be nullptr) + subtract_contig_dispatch_table, + // function pointers to handle operation on strided arrays + // (most general case) + subtract_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix + // and c-contig row with broadcasting (may be nullptr) + subtract_contig_matrix_contig_row_broadcast_dispatch_table, + // function pointers to handle operation of c-contig matrix + // and c-contig row with broadcasting (may be nullptr) + subtract_contig_row_contig_matrix_broadcast_dispatch_table); + }; + auto subtract_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + subtract_output_id_table); + }; + m.def("_subtract", subtract_pyapi, "", py::arg("src1"), py::arg("src2"), + py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_subtract_result_type", subtract_result_type_pyapi, ""); + + using impl::subtract_inplace_contig_dispatch_table; + using impl::subtract_inplace_row_matrix_dispatch_table; + using impl::subtract_inplace_strided_dispatch_table; + + auto subtract_inplace_pyapi = + [&](const dpctl::tensor::usm_ndarray &src, + const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, + const std::vector &depends = {}) { + return py_binary_inplace_ufunc( + src, dst, exec_q, depends, subtract_output_id_table, + // function pointers to handle inplace operation on + // contiguous arrays (pointers may be nullptr) + subtract_inplace_contig_dispatch_table, + // function pointers to handle inplace operation on strided + // arrays (most general case) + subtract_inplace_strided_dispatch_table, + // function pointers to handle inplace operation on + // c-contig matrix with c-contig row with broadcasting + // (may be nullptr) + subtract_inplace_row_matrix_dispatch_table); + }; + m.def("_subtract_inplace", subtract_inplace_pyapi, "", py::arg("lhs"), + py::arg("rhs"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + } + + // U34: ==== TAN (x) + { + impl::populate_tan_dispatch_vectors(); + using impl::tan_contig_dispatch_vector; + using impl::tan_output_typeid_vector; + using impl::tan_strided_dispatch_vector; + + auto tan_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, tan_output_typeid_vector, + tan_contig_dispatch_vector, tan_strided_dispatch_vector); + }; + m.def("_tan", tan_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto tan_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, tan_output_typeid_vector); + }; + m.def("_tan_result_type", tan_result_type_pyapi); + } + + // U35: ==== TANH (x) + { + impl::populate_tanh_dispatch_vectors(); + using impl::tanh_contig_dispatch_vector; + using impl::tanh_output_typeid_vector; + using impl::tanh_strided_dispatch_vector; + + auto tanh_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, tanh_output_typeid_vector, + tanh_contig_dispatch_vector, tanh_strided_dispatch_vector); + }; + m.def("_tanh", tanh_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto tanh_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, tanh_output_typeid_vector); + }; + m.def("_tanh_result_type", tanh_result_type_pyapi); + } + + // U36: ==== TRUNC (x) + { + impl::populate_trunc_dispatch_vectors(); + using impl::trunc_contig_dispatch_vector; + using impl::trunc_output_typeid_vector; + using impl::trunc_strided_dispatch_vector; + + auto trunc_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, trunc_output_typeid_vector, + trunc_contig_dispatch_vector, trunc_strided_dispatch_vector); + }; + m.def("_trunc", trunc_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto trunc_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, + trunc_output_typeid_vector); + }; + m.def("_trunc_result_type", trunc_result_type_pyapi); + } + + // U39: ==== RSQRT (x) + { + impl::populate_rsqrt_dispatch_vectors(); + using impl::rsqrt_contig_dispatch_vector; + using impl::rsqrt_output_typeid_vector; + using impl::rsqrt_strided_dispatch_vector; + + auto rsqrt_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, rsqrt_output_typeid_vector, + rsqrt_contig_dispatch_vector, rsqrt_strided_dispatch_vector); + }; + m.def("_rsqrt", rsqrt_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto rsqrt_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, + rsqrt_output_typeid_vector); + }; + m.def("_rsqrt_result_type", rsqrt_result_type_pyapi); + } +} + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions3.hpp b/dpctl/tensor/libtensor/source/elementwise_functions3.hpp new file mode 100644 index 0000000000..c5f7feab4f --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions3.hpp @@ -0,0 +1,40 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#pragma once +#include + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +extern void init_elementwise_functions3(py::module_ m); + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions4.cpp b/dpctl/tensor/libtensor/source/elementwise_functions4.cpp new file mode 100644 index 0000000000..dabaa1cb39 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions4.cpp @@ -0,0 +1,1551 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#include "dpctl4pybind11.hpp" +#include +#include +#include +#include +#include + +#include "elementwise_functions.hpp" +#include "elementwise_functions4.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/cbrt.hpp" +#include "kernels/elementwise_functions/ceil.hpp" +#include "kernels/elementwise_functions/conj.hpp" +#include "kernels/elementwise_functions/copysign.hpp" +#include "kernels/elementwise_functions/cos.hpp" +#include "kernels/elementwise_functions/cosh.hpp" +#include "kernels/elementwise_functions/equal.hpp" +#include "kernels/elementwise_functions/exp.hpp" +#include "kernels/elementwise_functions/exp2.hpp" +#include "kernels/elementwise_functions/expm1.hpp" +#include "kernels/elementwise_functions/floor.hpp" +#include "kernels/elementwise_functions/floor_divide.hpp" +#include "kernels/elementwise_functions/greater.hpp" +#include "kernels/elementwise_functions/greater_equal.hpp" +#include "kernels/elementwise_functions/hypot.hpp" +#include "kernels/elementwise_functions/imag.hpp" +#include "kernels/elementwise_functions/isfinite.hpp" +#include "kernels/elementwise_functions/isinf.hpp" +#include "kernels/elementwise_functions/isnan.hpp" +#include "kernels/elementwise_functions/true_divide.hpp" + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::binary_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_strided_impl_fn_ptr_t; +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +using ew_cmn_ns::binary_inplace_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_inplace_row_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_inplace_strided_impl_fn_ptr_t; + +// U09: ==== CEIL (x) +namespace impl +{ + +namespace ceil_fn_ns = dpctl::tensor::kernels::ceil; + +static unary_contig_impl_fn_ptr_t ceil_contig_dispatch_vector[td_ns::num_types]; +static int ceil_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + ceil_strided_dispatch_vector[td_ns::num_types]; + +void populate_ceil_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = ceil_fn_ns; + + using fn_ns::CeilContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(ceil_contig_dispatch_vector); + + using fn_ns::CeilStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(ceil_strided_dispatch_vector); + + using fn_ns::CeilTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(ceil_output_typeid_vector); +} + +} // namespace impl + +// U10: ==== CONJ (x) +namespace impl +{ + +namespace conj_fn_ns = dpctl::tensor::kernels::conj; + +static unary_contig_impl_fn_ptr_t conj_contig_dispatch_vector[td_ns::num_types]; +static int conj_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + conj_strided_dispatch_vector[td_ns::num_types]; + +void populate_conj_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = conj_fn_ns; + + using fn_ns::ConjContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(conj_contig_dispatch_vector); + + using fn_ns::ConjStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(conj_strided_dispatch_vector); + + using fn_ns::ConjTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(conj_output_typeid_vector); +} +} // namespace impl + +// U11: ==== COS (x) +namespace impl +{ + +namespace cos_fn_ns = dpctl::tensor::kernels::cos; + +static unary_contig_impl_fn_ptr_t cos_contig_dispatch_vector[td_ns::num_types]; +static int cos_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + cos_strided_dispatch_vector[td_ns::num_types]; + +void populate_cos_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = cos_fn_ns; + + using fn_ns::CosContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(cos_contig_dispatch_vector); + + using fn_ns::CosStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(cos_strided_dispatch_vector); + + using fn_ns::CosTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(cos_output_typeid_vector); +} + +} // namespace impl + +// U12: ==== COSH (x) +namespace impl +{ + +namespace cosh_fn_ns = dpctl::tensor::kernels::cosh; + +static unary_contig_impl_fn_ptr_t cosh_contig_dispatch_vector[td_ns::num_types]; +static int cosh_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + cosh_strided_dispatch_vector[td_ns::num_types]; + +void populate_cosh_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = cosh_fn_ns; + + using fn_ns::CoshContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(cosh_contig_dispatch_vector); + + using fn_ns::CoshStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(cosh_strided_dispatch_vector); + + using fn_ns::CoshTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(cosh_output_typeid_vector); +} + +} // namespace impl + +// B08: ==== DIVIDE (x1, x2) +namespace impl +{ +namespace true_divide_fn_ns = dpctl::tensor::kernels::true_divide; + +static binary_contig_impl_fn_ptr_t + true_divide_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static int true_divide_output_id_table[td_ns::num_types][td_ns::num_types]; +static int true_divide_inplace_output_id_table[td_ns::num_types] + [td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + true_divide_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +// divide(matrix, row) +static binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t + true_divide_contig_matrix_contig_row_broadcast_dispatch_table + [td_ns::num_types][td_ns::num_types]; + +// divide(row, matrix) +static binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t + true_divide_contig_row_contig_matrix_broadcast_dispatch_table + [td_ns::num_types][td_ns::num_types]; + +static binary_inplace_contig_impl_fn_ptr_t + true_divide_inplace_contig_dispatch_table[td_ns::num_types] + [td_ns::num_types]; +static binary_inplace_strided_impl_fn_ptr_t + true_divide_inplace_strided_dispatch_table[td_ns::num_types] + [td_ns::num_types]; +static binary_inplace_row_matrix_broadcast_impl_fn_ptr_t + true_divide_inplace_row_matrix_dispatch_table[td_ns::num_types] + [td_ns::num_types]; + +void populate_true_divide_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = true_divide_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::TrueDivideTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(true_divide_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::TrueDivideStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(true_divide_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::TrueDivideContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(true_divide_contig_dispatch_table); + + // function pointers for operation on contiguous matrix, contiguous row + // with contiguous matrix output + using fn_ns::TrueDivideContigMatrixContigRowBroadcastFactory; + DispatchTableBuilder< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t, + TrueDivideContigMatrixContigRowBroadcastFactory, num_types> + dtb4; + dtb4.populate_dispatch_table( + true_divide_contig_matrix_contig_row_broadcast_dispatch_table); + + // function pointers for operation on contiguous row, contiguous matrix + // with contiguous matrix output + using fn_ns::TrueDivideContigRowContigMatrixBroadcastFactory; + DispatchTableBuilder< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t, + TrueDivideContigRowContigMatrixBroadcastFactory, num_types> + dtb5; + dtb5.populate_dispatch_table( + true_divide_contig_row_contig_matrix_broadcast_dispatch_table); + + // which input types are supported, and what is the type of the result + using fn_ns::TrueDivideInplaceTypeMapFactory; + DispatchTableBuilder dtb6; + dtb6.populate_dispatch_table(true_divide_inplace_output_id_table); + + // function pointers for inplace operation on general strided arrays + using fn_ns::TrueDivideInplaceStridedFactory; + DispatchTableBuilder + dtb7; + dtb7.populate_dispatch_table(true_divide_inplace_strided_dispatch_table); + + // function pointers for inplace operation on contiguous inputs and output + using fn_ns::TrueDivideInplaceContigFactory; + DispatchTableBuilder + dtb8; + dtb8.populate_dispatch_table(true_divide_inplace_contig_dispatch_table); + + // function pointers for inplace operation on contiguous matrix + // and contiguous row + using fn_ns::TrueDivideInplaceRowMatrixBroadcastFactory; + DispatchTableBuilder + dtb9; + dtb9.populate_dispatch_table(true_divide_inplace_row_matrix_dispatch_table); +}; + +} // namespace impl + +// B09: ==== EQUAL (x1, x2) +namespace impl +{ +namespace equal_fn_ns = dpctl::tensor::kernels::equal; + +static binary_contig_impl_fn_ptr_t + equal_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static int equal_output_id_table[td_ns::num_types][td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + equal_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +void populate_equal_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = equal_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::EqualTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(equal_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::EqualStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(equal_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::EqualContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(equal_contig_dispatch_table); +}; +} // namespace impl + +// U13: ==== EXP (x) +namespace impl +{ + +namespace exp_fn_ns = dpctl::tensor::kernels::exp; + +static unary_contig_impl_fn_ptr_t exp_contig_dispatch_vector[td_ns::num_types]; +static int exp_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + exp_strided_dispatch_vector[td_ns::num_types]; + +void populate_exp_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = exp_fn_ns; + + using fn_ns::ExpContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(exp_contig_dispatch_vector); + + using fn_ns::ExpStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(exp_strided_dispatch_vector); + + using fn_ns::ExpTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(exp_output_typeid_vector); +} + +} // namespace impl + +// U14: ==== EXPM1 (x) +namespace impl +{ + +namespace expm1_fn_ns = dpctl::tensor::kernels::expm1; + +static unary_contig_impl_fn_ptr_t + expm1_contig_dispatch_vector[td_ns::num_types]; +static int expm1_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + expm1_strided_dispatch_vector[td_ns::num_types]; + +void populate_expm1_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = expm1_fn_ns; + + using fn_ns::Expm1ContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(expm1_contig_dispatch_vector); + + using fn_ns::Expm1StridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(expm1_strided_dispatch_vector); + + using fn_ns::Expm1TypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(expm1_output_typeid_vector); +} + +} // namespace impl + +// U15: ==== FLOOR (x) +namespace impl +{ + +namespace floor_fn_ns = dpctl::tensor::kernels::floor; + +static unary_contig_impl_fn_ptr_t + floor_contig_dispatch_vector[td_ns::num_types]; +static int floor_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + floor_strided_dispatch_vector[td_ns::num_types]; + +void populate_floor_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = floor_fn_ns; + + using fn_ns::FloorContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(floor_contig_dispatch_vector); + + using fn_ns::FloorStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(floor_strided_dispatch_vector); + + using fn_ns::FloorTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(floor_output_typeid_vector); +} + +} // namespace impl + +// B10: ==== FLOOR_DIVIDE (x1, x2) +namespace impl +{ +namespace floor_divide_fn_ns = dpctl::tensor::kernels::floor_divide; + +static binary_contig_impl_fn_ptr_t + floor_divide_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static int floor_divide_output_id_table[td_ns::num_types][td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + floor_divide_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +static binary_inplace_contig_impl_fn_ptr_t + floor_divide_inplace_contig_dispatch_table[td_ns::num_types] + [td_ns::num_types]; +static binary_inplace_strided_impl_fn_ptr_t + floor_divide_inplace_strided_dispatch_table[td_ns::num_types] + [td_ns::num_types]; + +void populate_floor_divide_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = floor_divide_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::FloorDivideTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(floor_divide_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::FloorDivideStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(floor_divide_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::FloorDivideContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(floor_divide_contig_dispatch_table); + + // function pointers for inplace operation on general strided arrays + using fn_ns::FloorDivideInplaceStridedFactory; + DispatchTableBuilder + dtb4; + dtb4.populate_dispatch_table(floor_divide_inplace_strided_dispatch_table); + + // function pointers for inplace operation on contiguous inputs and output + using fn_ns::FloorDivideInplaceContigFactory; + DispatchTableBuilder + dtb5; + dtb5.populate_dispatch_table(floor_divide_inplace_contig_dispatch_table); +}; + +} // namespace impl + +// B11: ==== GREATER (x1, x2) +namespace impl +{ +namespace greater_fn_ns = dpctl::tensor::kernels::greater; + +static binary_contig_impl_fn_ptr_t + greater_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static int greater_output_id_table[td_ns::num_types][td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + greater_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +void populate_greater_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = greater_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::GreaterTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(greater_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::GreaterStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(greater_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::GreaterContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(greater_contig_dispatch_table); +}; +} // namespace impl + +// B12: ==== GREATER_EQUAL (x1, x2) +namespace impl +{ +namespace greater_equal_fn_ns = dpctl::tensor::kernels::greater_equal; + +static binary_contig_impl_fn_ptr_t + greater_equal_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static int greater_equal_output_id_table[td_ns::num_types][td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + greater_equal_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +void populate_greater_equal_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = greater_equal_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::GreaterEqualTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(greater_equal_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::GreaterEqualStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(greater_equal_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::GreaterEqualContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(greater_equal_contig_dispatch_table); +}; +} // namespace impl + +// U16: ==== IMAG (x) +namespace impl +{ + +namespace imag_fn_ns = dpctl::tensor::kernels::imag; + +static unary_contig_impl_fn_ptr_t imag_contig_dispatch_vector[td_ns::num_types]; +static int imag_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + imag_strided_dispatch_vector[td_ns::num_types]; + +void populate_imag_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = imag_fn_ns; + + using fn_ns::ImagContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(imag_contig_dispatch_vector); + + using fn_ns::ImagStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(imag_strided_dispatch_vector); + + using fn_ns::ImagTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(imag_output_typeid_vector); +} +} // namespace impl + +// U17: ==== ISFINITE (x) +namespace impl +{ +namespace isfinite_fn_ns = dpctl::tensor::kernels::isfinite; + +static unary_contig_impl_fn_ptr_t + isfinite_contig_dispatch_vector[td_ns::num_types]; +static int isfinite_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + isfinite_strided_dispatch_vector[td_ns::num_types]; + +void populate_isfinite_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = isfinite_fn_ns; + + using fn_ns::IsFiniteContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(isfinite_contig_dispatch_vector); + + using fn_ns::IsFiniteStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(isfinite_strided_dispatch_vector); + + using fn_ns::IsFiniteTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(isfinite_output_typeid_vector); +} + +} // namespace impl + +// U18: ==== ISINF (x) +namespace impl +{ +namespace isinf_fn_ns = dpctl::tensor::kernels::isinf; + +static unary_contig_impl_fn_ptr_t + isinf_contig_dispatch_vector[td_ns::num_types]; +static int isinf_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + isinf_strided_dispatch_vector[td_ns::num_types]; + +void populate_isinf_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = isinf_fn_ns; + + using fn_ns::IsInfContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(isinf_contig_dispatch_vector); + + using fn_ns::IsInfStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(isinf_strided_dispatch_vector); + + using fn_ns::IsInfTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(isinf_output_typeid_vector); +} + +} // namespace impl + +// U19: ==== ISNAN (x) +namespace impl +{ +namespace isnan_fn_ns = dpctl::tensor::kernels::isnan; + +static unary_contig_impl_fn_ptr_t + isnan_contig_dispatch_vector[td_ns::num_types]; +static int isnan_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + isnan_strided_dispatch_vector[td_ns::num_types]; + +void populate_isnan_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = isnan_fn_ns; + + using fn_ns::IsNanContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(isnan_contig_dispatch_vector); + + using fn_ns::IsNanStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(isnan_strided_dispatch_vector); + + using fn_ns::IsNanTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(isnan_output_typeid_vector); +} + +} // namespace impl + +// B24: ==== HYPOT (x1, x2) +namespace impl +{ +namespace hypot_fn_ns = dpctl::tensor::kernels::hypot; + +static binary_contig_impl_fn_ptr_t + hypot_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static int hypot_output_id_table[td_ns::num_types][td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + hypot_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +void populate_hypot_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = hypot_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::HypotTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(hypot_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::HypotStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(hypot_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::HypotContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(hypot_contig_dispatch_table); +}; + +} // namespace impl + +// U37: ==== CBRT (x) +namespace impl +{ + +namespace cbrt_fn_ns = dpctl::tensor::kernels::cbrt; + +static unary_contig_impl_fn_ptr_t cbrt_contig_dispatch_vector[td_ns::num_types]; +static int cbrt_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + cbrt_strided_dispatch_vector[td_ns::num_types]; + +void populate_cbrt_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = cbrt_fn_ns; + + using fn_ns::CbrtContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(cbrt_contig_dispatch_vector); + + using fn_ns::CbrtStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(cbrt_strided_dispatch_vector); + + using fn_ns::CbrtTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(cbrt_output_typeid_vector); +} + +} // namespace impl + +// B24: ==== COPYSIGN (x1, x2) +namespace impl +{ +namespace copysign_fn_ns = dpctl::tensor::kernels::copysign; + +static binary_contig_impl_fn_ptr_t + copysign_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static int copysign_output_id_table[td_ns::num_types][td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + copysign_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +void populate_copysign_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = copysign_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::CopysignTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(copysign_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::CopysignStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(copysign_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::CopysignContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(copysign_contig_dispatch_table); +}; + +} // namespace impl + +// U38: ==== EXP2 (x) +namespace impl +{ + +namespace exp2_fn_ns = dpctl::tensor::kernels::exp2; + +static unary_contig_impl_fn_ptr_t exp2_contig_dispatch_vector[td_ns::num_types]; +static int exp2_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + exp2_strided_dispatch_vector[td_ns::num_types]; + +void populate_exp2_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = exp2_fn_ns; + + using fn_ns::Exp2ContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(exp2_contig_dispatch_vector); + + using fn_ns::Exp2StridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(exp2_strided_dispatch_vector); + + using fn_ns::Exp2TypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(exp2_output_typeid_vector); +} + +} // namespace impl + +// ========================================================================================== +// // + +namespace py = pybind11; + +void init_elementwise_functions4(py::module_ m) +{ + + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + + // U09: ==== CEIL (x) + { + impl::populate_ceil_dispatch_vectors(); + using impl::ceil_contig_dispatch_vector; + using impl::ceil_output_typeid_vector; + using impl::ceil_strided_dispatch_vector; + + auto ceil_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, ceil_output_typeid_vector, + ceil_contig_dispatch_vector, ceil_strided_dispatch_vector); + }; + m.def("_ceil", ceil_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto ceil_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, ceil_output_typeid_vector); + }; + m.def("_ceil_result_type", ceil_result_type_pyapi); + } + + // U10: ==== CONJ (x) + { + impl::populate_conj_dispatch_vectors(); + using impl::conj_contig_dispatch_vector; + using impl::conj_output_typeid_vector; + using impl::conj_strided_dispatch_vector; + + auto conj_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, conj_output_typeid_vector, + conj_contig_dispatch_vector, conj_strided_dispatch_vector); + }; + m.def("_conj", conj_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto conj_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, conj_output_typeid_vector); + }; + m.def("_conj_result_type", conj_result_type_pyapi); + } + + // U11: ==== COS (x) + { + impl::populate_cos_dispatch_vectors(); + using impl::cos_contig_dispatch_vector; + using impl::cos_output_typeid_vector; + using impl::cos_strided_dispatch_vector; + + auto cos_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, cos_output_typeid_vector, + cos_contig_dispatch_vector, cos_strided_dispatch_vector); + }; + m.def("_cos", cos_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto cos_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, cos_output_typeid_vector); + }; + m.def("_cos_result_type", cos_result_type_pyapi); + } + + // U12: ==== COSH (x) + { + impl::populate_cosh_dispatch_vectors(); + using impl::cosh_contig_dispatch_vector; + using impl::cosh_output_typeid_vector; + using impl::cosh_strided_dispatch_vector; + + auto cosh_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, cosh_output_typeid_vector, + cosh_contig_dispatch_vector, cosh_strided_dispatch_vector); + }; + m.def("_cosh", cosh_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto cosh_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, cosh_output_typeid_vector); + }; + m.def("_cosh_result_type", cosh_result_type_pyapi); + } + + // B08: ==== DIVIDE (x1, x2) + { + impl::populate_true_divide_dispatch_tables(); + using impl::true_divide_contig_dispatch_table; + using impl:: + true_divide_contig_matrix_contig_row_broadcast_dispatch_table; + using impl:: + true_divide_contig_row_contig_matrix_broadcast_dispatch_table; + using impl::true_divide_output_id_table; + using impl::true_divide_strided_dispatch_table; + + auto divide_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, + const dpctl::tensor::usm_ndarray &src2, + const dpctl::tensor::usm_ndarray &dst, + sycl::queue &exec_q, + const std::vector &depends = {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, true_divide_output_id_table, + // function pointers to handle operation on contiguous arrays + // (pointers may be nullptr) + true_divide_contig_dispatch_table, + // function pointers to handle operation on strided arrays (most + // general case) + true_divide_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + true_divide_contig_matrix_contig_row_broadcast_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + true_divide_contig_row_contig_matrix_broadcast_dispatch_table); + }; + auto divide_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + true_divide_output_id_table); + }; + m.def("_divide", divide_pyapi, "", py::arg("src1"), py::arg("src2"), + py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_divide_result_type", divide_result_type_pyapi, ""); + + using impl::true_divide_inplace_contig_dispatch_table; + using impl::true_divide_inplace_output_id_table; + using impl::true_divide_inplace_row_matrix_dispatch_table; + using impl::true_divide_inplace_strided_dispatch_table; + + auto divide_inplace_pyapi = + [&](const dpctl::tensor::usm_ndarray &src, + const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, + const std::vector &depends = {}) { + return py_binary_inplace_ufunc( + src, dst, exec_q, depends, + true_divide_inplace_output_id_table, + // function pointers to handle inplace operation on + // contiguous arrays (pointers may be nullptr) + true_divide_inplace_contig_dispatch_table, + // function pointers to handle inplace operation on strided + // arrays (most general case) + true_divide_inplace_strided_dispatch_table, + // function pointers to handle inplace operation on + // c-contig matrix with c-contig row with broadcasting + // (may be nullptr) + true_divide_inplace_row_matrix_dispatch_table); + }; + m.def("_divide_inplace", divide_inplace_pyapi, "", py::arg("lhs"), + py::arg("rhs"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + } + + // B09: ==== EQUAL (x1, x2) + { + impl::populate_equal_dispatch_tables(); + using impl::equal_contig_dispatch_table; + using impl::equal_output_id_table; + using impl::equal_strided_dispatch_table; + + auto equal_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, + const dpctl::tensor::usm_ndarray &src2, + const dpctl::tensor::usm_ndarray &dst, + sycl::queue &exec_q, + const std::vector &depends = {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, equal_output_id_table, + // function pointers to handle operation on contiguous arrays + // (pointers may be nullptr) + equal_contig_dispatch_table, + // function pointers to handle operation on strided arrays (most + // general case) + equal_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + auto equal_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + equal_output_id_table); + }; + m.def("_equal", equal_pyapi, "", py::arg("src1"), py::arg("src2"), + py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_equal_result_type", equal_result_type_pyapi, ""); + } + + // U13: ==== EXP (x) + { + impl::populate_exp_dispatch_vectors(); + using impl::exp_contig_dispatch_vector; + using impl::exp_output_typeid_vector; + using impl::exp_strided_dispatch_vector; + + auto exp_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, exp_output_typeid_vector, + exp_contig_dispatch_vector, exp_strided_dispatch_vector); + }; + m.def("_exp", exp_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto exp_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, exp_output_typeid_vector); + }; + m.def("_exp_result_type", exp_result_type_pyapi); + } + + // U14: ==== EXPM1 (x) + { + impl::populate_expm1_dispatch_vectors(); + using impl::expm1_contig_dispatch_vector; + using impl::expm1_output_typeid_vector; + using impl::expm1_strided_dispatch_vector; + + auto expm1_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, expm1_output_typeid_vector, + expm1_contig_dispatch_vector, expm1_strided_dispatch_vector); + }; + m.def("_expm1", expm1_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto expm1_result_type_pyapi = [&](const py::dtype dtype) { + return py_unary_ufunc_result_type(dtype, + expm1_output_typeid_vector); + }; + m.def("_expm1_result_type", expm1_result_type_pyapi); + } + + // U15: ==== FLOOR (x) + { + impl::populate_floor_dispatch_vectors(); + using impl::floor_contig_dispatch_vector; + using impl::floor_output_typeid_vector; + using impl::floor_strided_dispatch_vector; + + auto floor_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, floor_output_typeid_vector, + floor_contig_dispatch_vector, floor_strided_dispatch_vector); + }; + m.def("_floor", floor_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto floor_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, + floor_output_typeid_vector); + }; + m.def("_floor_result_type", floor_result_type_pyapi); + } + + // B10: ==== FLOOR_DIVIDE (x1, x2) + { + impl::populate_floor_divide_dispatch_tables(); + using impl::floor_divide_contig_dispatch_table; + using impl::floor_divide_output_id_table; + using impl::floor_divide_strided_dispatch_table; + + auto floor_divide_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, + const dpctl::tensor::usm_ndarray &src2, + const dpctl::tensor::usm_ndarray &dst, + sycl::queue &exec_q, + const std::vector &depends = + {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, floor_divide_output_id_table, + // function pointers to handle operation on contiguous arrays + // (pointers may be nullptr) + floor_divide_contig_dispatch_table, + // function pointers to handle operation on strided arrays (most + // general case) + floor_divide_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + auto floor_divide_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + floor_divide_output_id_table); + }; + m.def("_floor_divide", floor_divide_pyapi, "", py::arg("src1"), + py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_floor_divide_result_type", floor_divide_result_type_pyapi, ""); + + using impl::floor_divide_inplace_contig_dispatch_table; + using impl::floor_divide_inplace_strided_dispatch_table; + + auto floor_divide_inplace_pyapi = + [&](const dpctl::tensor::usm_ndarray &src, + const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, + const std::vector &depends = {}) { + return py_binary_inplace_ufunc( + src, dst, exec_q, depends, floor_divide_output_id_table, + // function pointers to handle inplace operation on + // contiguous arrays (pointers may be nullptr) + floor_divide_inplace_contig_dispatch_table, + // function pointers to handle inplace operation on strided + // arrays (most general case) + floor_divide_inplace_strided_dispatch_table, + // function pointers to handle inplace operation on + // c-contig matrix with c-contig row with broadcasting + // (may be nullptr) + td_ns::NullPtrTable< + binary_inplace_row_matrix_broadcast_impl_fn_ptr_t>{}); + }; + m.def("_floor_divide_inplace", floor_divide_inplace_pyapi, "", + py::arg("lhs"), py::arg("rhs"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + } + + // B11: ==== GREATER (x1, x2) + { + impl::populate_greater_dispatch_tables(); + using impl::greater_contig_dispatch_table; + using impl::greater_output_id_table; + using impl::greater_strided_dispatch_table; + + auto greater_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, + const dpctl::tensor::usm_ndarray &src2, + const dpctl::tensor::usm_ndarray &dst, + sycl::queue &exec_q, + const std::vector &depends = {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, greater_output_id_table, + // function pointers to handle operation on contiguous arrays + // (pointers may be nullptr) + greater_contig_dispatch_table, + // function pointers to handle operation on strided arrays (most + // general case) + greater_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + auto greater_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + greater_output_id_table); + }; + m.def("_greater", greater_pyapi, "", py::arg("src1"), py::arg("src2"), + py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_greater_result_type", greater_result_type_pyapi, ""); + } + + // B12: ==== GREATER_EQUAL (x1, x2) + { + impl::populate_greater_equal_dispatch_tables(); + using impl::greater_equal_contig_dispatch_table; + using impl::greater_equal_output_id_table; + using impl::greater_equal_strided_dispatch_table; + + auto greater_equal_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, + const dpctl::tensor::usm_ndarray &src2, + const dpctl::tensor::usm_ndarray &dst, + sycl::queue &exec_q, + const std::vector &depends = + {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, greater_equal_output_id_table, + // function pointers to handle operation on contiguous arrays + // (pointers may be nullptr) + greater_equal_contig_dispatch_table, + // function pointers to handle operation on strided arrays (most + // general case) + greater_equal_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + auto greater_equal_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + greater_equal_output_id_table); + }; + m.def("_greater_equal", greater_equal_pyapi, "", py::arg("src1"), + py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_greater_equal_result_type", greater_equal_result_type_pyapi, + ""); + } + + // U16: ==== IMAG (x) + { + impl::populate_imag_dispatch_vectors(); + using impl::imag_contig_dispatch_vector; + using impl::imag_output_typeid_vector; + using impl::imag_strided_dispatch_vector; + + auto imag_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, imag_output_typeid_vector, + imag_contig_dispatch_vector, imag_strided_dispatch_vector); + }; + m.def("_imag", imag_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto imag_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, imag_output_typeid_vector); + }; + m.def("_imag_result_type", imag_result_type_pyapi); + } + + // U17: ==== ISFINITE (x) + { + impl::populate_isfinite_dispatch_vectors(); + + using impl::isfinite_contig_dispatch_vector; + using impl::isfinite_output_typeid_vector; + using impl::isfinite_strided_dispatch_vector; + auto isfinite_pyapi = + [&](const dpctl::tensor::usm_ndarray &src, + const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, + const std::vector &depends = {}) { + return py_unary_ufunc(src, dst, exec_q, depends, + isfinite_output_typeid_vector, + isfinite_contig_dispatch_vector, + isfinite_strided_dispatch_vector); + }; + auto isfinite_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, + isfinite_output_typeid_vector); + }; + m.def("_isfinite", isfinite_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + m.def("_isfinite_result_type", isfinite_result_type_pyapi, ""); + } + + // U18: ==== ISINF (x) + { + impl::populate_isinf_dispatch_vectors(); + + using impl::isinf_contig_dispatch_vector; + using impl::isinf_output_typeid_vector; + using impl::isinf_strided_dispatch_vector; + auto isinf_pyapi = [&](const dpctl::tensor::usm_ndarray &src, + const dpctl::tensor::usm_ndarray &dst, + sycl::queue &exec_q, + const std::vector &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, isinf_output_typeid_vector, + isinf_contig_dispatch_vector, isinf_strided_dispatch_vector); + }; + auto isinf_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, + isinf_output_typeid_vector); + }; + m.def("_isinf", isinf_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + m.def("_isinf_result_type", isinf_result_type_pyapi, ""); + } + + // U19: ==== ISNAN (x) + { + impl::populate_isnan_dispatch_vectors(); + + using impl::isnan_contig_dispatch_vector; + using impl::isnan_output_typeid_vector; + using impl::isnan_strided_dispatch_vector; + auto isnan_pyapi = [&](const dpctl::tensor::usm_ndarray &src, + const dpctl::tensor::usm_ndarray &dst, + sycl::queue &exec_q, + const std::vector &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, isnan_output_typeid_vector, + isnan_contig_dispatch_vector, isnan_strided_dispatch_vector); + }; + auto isnan_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, + isnan_output_typeid_vector); + }; + m.def("_isnan", isnan_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + m.def("_isnan_result_type", isnan_result_type_pyapi, ""); + } + + // B24: ==== HYPOT (x1, x2) + { + impl::populate_hypot_dispatch_tables(); + using impl::hypot_contig_dispatch_table; + using impl::hypot_output_id_table; + using impl::hypot_strided_dispatch_table; + + auto hypot_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, + const dpctl::tensor::usm_ndarray &src2, + const dpctl::tensor::usm_ndarray &dst, + sycl::queue &exec_q, + const std::vector &depends = {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, hypot_output_id_table, + // function pointers to handle operation on contiguous arrays + // (pointers may be nullptr) + hypot_contig_dispatch_table, + // function pointers to handle operation on strided arrays (most + // general case) + hypot_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + auto hypot_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + hypot_output_id_table); + }; + m.def("_hypot", hypot_pyapi, "", py::arg("src1"), py::arg("src2"), + py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_hypot_result_type", hypot_result_type_pyapi, ""); + } + + // U37: ==== CBRT (x) + { + impl::populate_cbrt_dispatch_vectors(); + using impl::cbrt_contig_dispatch_vector; + using impl::cbrt_output_typeid_vector; + using impl::cbrt_strided_dispatch_vector; + + auto cbrt_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, cbrt_output_typeid_vector, + cbrt_contig_dispatch_vector, cbrt_strided_dispatch_vector); + }; + m.def("_cbrt", cbrt_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto cbrt_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, cbrt_output_typeid_vector); + }; + m.def("_cbrt_result_type", cbrt_result_type_pyapi); + } + + // B25: ==== COPYSIGN (x1, x2) + { + impl::populate_copysign_dispatch_tables(); + using impl::copysign_contig_dispatch_table; + using impl::copysign_output_id_table; + using impl::copysign_strided_dispatch_table; + + auto copysign_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, + const dpctl::tensor::usm_ndarray &src2, + const dpctl::tensor::usm_ndarray &dst, + sycl::queue &exec_q, + const std::vector &depends = + {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, copysign_output_id_table, + // function pointers to handle operation on contiguous arrays + // (pointers may be nullptr) + copysign_contig_dispatch_table, + // function pointers to handle operation on strided arrays (most + // general case) + copysign_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + auto copysign_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + copysign_output_id_table); + }; + m.def("_copysign", copysign_pyapi, "", py::arg("src1"), py::arg("src2"), + py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_copysign_result_type", copysign_result_type_pyapi, ""); + } + + // U38: ==== EXP2 (x) + { + impl::populate_exp2_dispatch_vectors(); + using impl::exp2_contig_dispatch_vector; + using impl::exp2_output_typeid_vector; + using impl::exp2_strided_dispatch_vector; + + auto exp2_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, exp2_output_typeid_vector, + exp2_contig_dispatch_vector, exp2_strided_dispatch_vector); + }; + m.def("_exp2", exp2_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto exp2_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, exp2_output_typeid_vector); + }; + m.def("_exp2_result_type", exp2_result_type_pyapi); + } +} + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions4.hpp b/dpctl/tensor/libtensor/source/elementwise_functions4.hpp new file mode 100644 index 0000000000..920221d644 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions4.hpp @@ -0,0 +1,40 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#pragma once +#include + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +extern void init_elementwise_functions4(py::module_ m); + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/tensor_py.cpp b/dpctl/tensor/libtensor/source/tensor_py.cpp index ad17a71c9a..3ffd6216b9 100644 --- a/dpctl/tensor/libtensor/source/tensor_py.cpp +++ b/dpctl/tensor/libtensor/source/tensor_py.cpp @@ -44,6 +44,8 @@ #include "device_support_queries.hpp" #include "elementwise_functions1.hpp" #include "elementwise_functions2.hpp" +#include "elementwise_functions3.hpp" +#include "elementwise_functions4.hpp" #include "eye_ctor.hpp" #include "full_ctor.hpp" #include "integer_advanced_indexing.hpp" @@ -444,6 +446,8 @@ PYBIND11_MODULE(_tensor_impl, m) dpctl::tensor::py_internal::init_elementwise_functions1(m); dpctl::tensor::py_internal::init_elementwise_functions2(m); + dpctl::tensor::py_internal::init_elementwise_functions3(m); + dpctl::tensor::py_internal::init_elementwise_functions4(m); dpctl::tensor::py_internal::init_boolean_reduction_functions(m); dpctl::tensor::py_internal::init_reduction_functions(m); } From cd74a60b6cceed602f48d98db42223951f7e4547 Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Mon, 23 Oct 2023 11:40:21 -0700 Subject: [PATCH 11/19] Fix more missing includes --- .../include/kernels/elementwise_functions/bitwise_and.hpp | 1 + .../include/kernels/elementwise_functions/bitwise_invert.hpp | 2 ++ .../kernels/elementwise_functions/bitwise_left_shift.hpp | 1 + .../include/kernels/elementwise_functions/bitwise_or.hpp | 1 + .../kernels/elementwise_functions/bitwise_right_shift.hpp | 1 + .../include/kernels/elementwise_functions/bitwise_xor.hpp | 1 + .../include/kernels/elementwise_functions/remainder.hpp | 1 + .../include/kernels/elementwise_functions/subtract.hpp | 1 + .../include/kernels/elementwise_functions/true_divide.hpp | 1 + 9 files changed, 10 insertions(+) diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_and.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_and.hpp index c928200e3c..d88d17d3e3 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_and.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_and.hpp @@ -33,6 +33,7 @@ #include "utils/type_utils.hpp" #include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/common_inplace.hpp" #include namespace dpctl diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_invert.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_invert.hpp index 9ce56be966..ed4aeeb59e 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_invert.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_invert.hpp @@ -35,6 +35,8 @@ #include "utils/type_utils.hpp" #include +#include "kernels/elementwise_functions/common.hpp" + namespace dpctl { namespace tensor diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_left_shift.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_left_shift.hpp index 8fce912e68..5cfd6ca5e3 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_left_shift.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_left_shift.hpp @@ -34,6 +34,7 @@ #include "utils/type_utils.hpp" #include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/common_inplace.hpp" #include namespace dpctl diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_or.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_or.hpp index 3984f9b94e..d5669d41b1 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_or.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_or.hpp @@ -33,6 +33,7 @@ #include "utils/type_utils.hpp" #include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/common_inplace.hpp" #include namespace dpctl diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_right_shift.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_right_shift.hpp index 1607ced360..5a04165701 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_right_shift.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_right_shift.hpp @@ -34,6 +34,7 @@ #include "utils/type_utils.hpp" #include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/common_inplace.hpp" #include namespace dpctl diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_xor.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_xor.hpp index decb2ddfd0..ec8192fd0f 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_xor.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/bitwise_xor.hpp @@ -33,6 +33,7 @@ #include "utils/type_utils.hpp" #include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/common_inplace.hpp" #include namespace dpctl diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/remainder.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/remainder.hpp index 2a821b4e85..051a1f9029 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/remainder.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/remainder.hpp @@ -35,6 +35,7 @@ #include "utils/type_utils.hpp" #include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/common_inplace.hpp" #include namespace dpctl diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/subtract.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/subtract.hpp index 3eb8420933..e4ae857738 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/subtract.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/subtract.hpp @@ -34,6 +34,7 @@ #include "utils/type_utils.hpp" #include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/common_inplace.hpp" #include namespace dpctl diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/true_divide.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/true_divide.hpp index 138f7a3f91..86fb0ca2e2 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/true_divide.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/true_divide.hpp @@ -34,6 +34,7 @@ #include "utils/type_utils.hpp" #include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/common_inplace.hpp" #include namespace dpctl From dd78cf76add5089c8b6ca8deb6757d349b5549a4 Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Mon, 23 Oct 2023 14:36:30 -0700 Subject: [PATCH 12/19] Splits elementwise functions into separate source files --- dpctl/tensor/CMakeLists.txt | 90 +- .../source/elementwise_functions/abs.cpp | 119 ++ .../abs.hpp} | 4 +- .../source/elementwise_functions/acos.cpp | 119 ++ .../source/elementwise_functions/acos.hpp | 42 + .../source/elementwise_functions/acosh.cpp | 121 ++ .../source/elementwise_functions/acosh.hpp | 42 + .../source/elementwise_functions/add.cpp | 229 +++ .../add.hpp} | 4 +- .../source/elementwise_functions/asin.cpp | 119 ++ .../source/elementwise_functions/asin.hpp | 42 + .../source/elementwise_functions/asinh.cpp | 121 ++ .../source/elementwise_functions/asinh.hpp | 42 + .../source/elementwise_functions/atan.cpp | 119 ++ .../source/elementwise_functions/atan.hpp | 42 + .../source/elementwise_functions/atan2.cpp | 140 ++ .../source/elementwise_functions/atan2.hpp | 42 + .../source/elementwise_functions/atanh.cpp | 121 ++ .../source/elementwise_functions/atanh.hpp | 42 + .../elementwise_functions/bitwise_and.cpp | 190 ++ .../elementwise_functions/bitwise_and.hpp | 42 + .../elementwise_functions/bitwise_invert.cpp | 123 ++ .../elementwise_functions/bitwise_invert.hpp | 42 + .../bitwise_left_shift.cpp | 200 +++ .../bitwise_left_shift.hpp | 42 + .../elementwise_functions/bitwise_or.cpp | 190 ++ .../elementwise_functions/bitwise_or.hpp | 42 + .../bitwise_right_shift.cpp | 201 +++ .../bitwise_right_shift.hpp | 42 + .../elementwise_functions/bitwise_xor.cpp | 190 ++ .../elementwise_functions/bitwise_xor.hpp | 42 + .../source/elementwise_functions/cbrt.cpp | 119 ++ .../source/elementwise_functions/cbrt.hpp | 42 + .../source/elementwise_functions/ceil.cpp | 119 ++ .../source/elementwise_functions/ceil.hpp | 44 + .../source/elementwise_functions/conj.cpp | 119 ++ .../source/elementwise_functions/conj.hpp | 42 + .../source/elementwise_functions/copysign.cpp | 140 ++ .../source/elementwise_functions/copysign.hpp | 42 + .../source/elementwise_functions/cos.cpp | 119 ++ .../cos.hpp} | 4 +- .../source/elementwise_functions/cosh.cpp | 119 ++ .../source/elementwise_functions/cosh.hpp | 42 + .../elementwise_common.cpp | 248 +++ .../elementwise_common.hpp | 42 + .../elementwise_functions.hpp | 1 + .../source/elementwise_functions/equal.cpp | 140 ++ .../source/elementwise_functions/equal.hpp | 42 + .../source/elementwise_functions/exp.cpp | 119 ++ .../exp.hpp} | 4 +- .../source/elementwise_functions/exp2.cpp | 119 ++ .../source/elementwise_functions/exp2.hpp | 42 + .../source/elementwise_functions/expm1.cpp | 121 ++ .../source/elementwise_functions/expm1.hpp | 42 + .../source/elementwise_functions/floor.cpp | 121 ++ .../source/elementwise_functions/floor.hpp | 42 + .../elementwise_functions/floor_divide.cpp | 190 ++ .../elementwise_functions/floor_divide.hpp | 42 + .../source/elementwise_functions/greater.cpp | 140 ++ .../source/elementwise_functions/greater.hpp | 42 + .../elementwise_functions/greater_equal.cpp | 141 ++ .../elementwise_functions/greater_equal.hpp | 42 + .../source/elementwise_functions/hypot.cpp | 140 ++ .../source/elementwise_functions/hypot.hpp | 42 + .../source/elementwise_functions/imag.cpp | 119 ++ .../source/elementwise_functions/imag.hpp | 42 + .../source/elementwise_functions/isfinite.cpp | 122 ++ .../source/elementwise_functions/isfinite.hpp | 42 + .../source/elementwise_functions/isinf.cpp | 121 ++ .../source/elementwise_functions/isinf.hpp | 42 + .../source/elementwise_functions/isnan.cpp | 121 ++ .../source/elementwise_functions/isnan.hpp | 42 + .../source/elementwise_functions/less.cpp | 140 ++ .../source/elementwise_functions/less.hpp | 42 + .../elementwise_functions/less_equal.cpp | 140 ++ .../elementwise_functions/less_equal.hpp | 42 + .../source/elementwise_functions/log.cpp | 119 ++ .../source/elementwise_functions/log.hpp | 42 + .../source/elementwise_functions/log10.cpp | 121 ++ .../source/elementwise_functions/log10.hpp | 42 + .../source/elementwise_functions/log1p.cpp | 121 ++ .../source/elementwise_functions/log1p.hpp | 42 + .../source/elementwise_functions/log2.cpp | 119 ++ .../source/elementwise_functions/log2.hpp | 42 + .../elementwise_functions/logaddexp.cpp | 140 ++ .../elementwise_functions/logaddexp.hpp | 42 + .../elementwise_functions/logical_and.cpp | 140 ++ .../elementwise_functions/logical_and.hpp | 42 + .../elementwise_functions/logical_not.cpp | 123 ++ .../elementwise_functions/logical_not.hpp | 42 + .../elementwise_functions/logical_or.cpp | 140 ++ .../elementwise_functions/logical_or.hpp | 42 + .../elementwise_functions/logical_xor.cpp | 140 ++ .../elementwise_functions/logical_xor.hpp | 42 + .../source/elementwise_functions/maximum.cpp | 140 ++ .../source/elementwise_functions/maximum.hpp | 42 + .../source/elementwise_functions/minimum.cpp | 140 ++ .../source/elementwise_functions/minimum.hpp | 42 + .../source/elementwise_functions/multiply.cpp | 230 +++ .../source/elementwise_functions/multiply.hpp | 42 + .../source/elementwise_functions/negative.cpp | 122 ++ .../source/elementwise_functions/negative.hpp | 42 + .../elementwise_functions/not_equal.cpp | 140 ++ .../elementwise_functions/not_equal.hpp | 42 + .../source/elementwise_functions/positive.cpp | 122 ++ .../source/elementwise_functions/positive.hpp | 42 + .../source/elementwise_functions/pow.cpp | 189 ++ .../source/elementwise_functions/pow.hpp | 42 + .../source/elementwise_functions/proj.cpp | 119 ++ .../source/elementwise_functions/proj.hpp | 42 + .../source/elementwise_functions/real.cpp | 119 ++ .../source/elementwise_functions/real.hpp | 42 + .../elementwise_functions/remainder.cpp | 190 ++ .../elementwise_functions/remainder.hpp | 42 + .../source/elementwise_functions/round.cpp | 121 ++ .../source/elementwise_functions/round.hpp | 42 + .../source/elementwise_functions/rsqrt.cpp | 121 ++ .../source/elementwise_functions/rsqrt.hpp | 42 + .../source/elementwise_functions/sign.cpp | 119 ++ .../source/elementwise_functions/sign.hpp | 42 + .../source/elementwise_functions/signbit.cpp | 122 ++ .../source/elementwise_functions/signbit.hpp | 42 + .../source/elementwise_functions/sin.cpp | 119 ++ .../source/elementwise_functions/sin.hpp | 42 + .../source/elementwise_functions/sinh.cpp | 119 ++ .../source/elementwise_functions/sinh.hpp | 42 + .../source/elementwise_functions/sqrt.cpp | 119 ++ .../source/elementwise_functions/sqrt.hpp | 42 + .../source/elementwise_functions/square.cpp | 121 ++ .../source/elementwise_functions/square.hpp | 42 + .../source/elementwise_functions/subtract.cpp | 229 +++ .../source/elementwise_functions/subtract.hpp | 42 + .../source/elementwise_functions/tan.cpp | 119 ++ .../source/elementwise_functions/tan.hpp | 42 + .../source/elementwise_functions/tanh.cpp | 119 ++ .../source/elementwise_functions/tanh.hpp | 42 + .../elementwise_functions/true_divide.cpp | 241 +++ .../elementwise_functions/true_divide.hpp | 42 + .../source/elementwise_functions/trunc.cpp | 121 ++ .../source/elementwise_functions/trunc.hpp | 42 + .../source/elementwise_functions1.cpp | 1242 ------------- .../source/elementwise_functions2.cpp | 1454 --------------- .../source/elementwise_functions3.cpp | 1407 --------------- .../source/elementwise_functions4.cpp | 1551 ----------------- dpctl/tensor/libtensor/source/tensor_py.cpp | 10 +- 145 files changed, 12570 insertions(+), 5676 deletions(-) create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/abs.cpp rename dpctl/tensor/libtensor/source/{elementwise_functions3.hpp => elementwise_functions/abs.hpp} (95%) create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/acos.cpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/acos.hpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/acosh.cpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/acosh.hpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/add.cpp rename dpctl/tensor/libtensor/source/{elementwise_functions1.hpp => elementwise_functions/add.hpp} (95%) create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/asin.cpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/asin.hpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/asinh.cpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/asinh.hpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/atan.cpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/atan.hpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/atan2.cpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/atan2.hpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/atanh.cpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/atanh.hpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/bitwise_and.cpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/bitwise_and.hpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/bitwise_invert.cpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/bitwise_invert.hpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/bitwise_left_shift.cpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/bitwise_left_shift.hpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/bitwise_or.cpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/bitwise_or.hpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/bitwise_right_shift.cpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/bitwise_right_shift.hpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/bitwise_xor.cpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/bitwise_xor.hpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/cbrt.cpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/cbrt.hpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/ceil.cpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/ceil.hpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/conj.cpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/conj.hpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/copysign.cpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/copysign.hpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/cos.cpp rename dpctl/tensor/libtensor/source/{elementwise_functions4.hpp => elementwise_functions/cos.hpp} (95%) create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/cosh.cpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/cosh.hpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/elementwise_common.cpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/elementwise_common.hpp rename dpctl/tensor/libtensor/source/{ => elementwise_functions}/elementwise_functions.hpp (99%) create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/equal.cpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/equal.hpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/exp.cpp rename dpctl/tensor/libtensor/source/{elementwise_functions2.hpp => elementwise_functions/exp.hpp} (95%) create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/exp2.cpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/exp2.hpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/expm1.cpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/expm1.hpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/floor.cpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/floor.hpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/floor_divide.cpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/floor_divide.hpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/greater.cpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/greater.hpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/greater_equal.cpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/greater_equal.hpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/hypot.cpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/hypot.hpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/imag.cpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/imag.hpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/isfinite.cpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/isfinite.hpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/isinf.cpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/isinf.hpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/isnan.cpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/isnan.hpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/less.cpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/less.hpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/less_equal.cpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/less_equal.hpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/log.cpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/log.hpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/log10.cpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/log10.hpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/log1p.cpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/log1p.hpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/log2.cpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/log2.hpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/logaddexp.cpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/logaddexp.hpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/logical_and.cpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/logical_and.hpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/logical_not.cpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/logical_not.hpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/logical_or.cpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/logical_or.hpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/logical_xor.cpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/logical_xor.hpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/maximum.cpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/maximum.hpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/minimum.cpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/minimum.hpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/multiply.cpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/multiply.hpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/negative.cpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/negative.hpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/not_equal.cpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/not_equal.hpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/positive.cpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/positive.hpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/pow.cpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/pow.hpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/proj.cpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/proj.hpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/real.cpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/real.hpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/remainder.cpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/remainder.hpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/round.cpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/round.hpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/rsqrt.cpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/rsqrt.hpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/sign.cpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/sign.hpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/signbit.cpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/signbit.hpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/sin.cpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/sin.hpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/sinh.cpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/sinh.hpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/sqrt.cpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/sqrt.hpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/square.cpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/square.hpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/subtract.cpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/subtract.hpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/tan.cpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/tan.hpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/tanh.cpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/tanh.hpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/true_divide.cpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/true_divide.hpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/trunc.cpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/trunc.hpp delete mode 100644 dpctl/tensor/libtensor/source/elementwise_functions1.cpp delete mode 100644 dpctl/tensor/libtensor/source/elementwise_functions2.cpp delete mode 100644 dpctl/tensor/libtensor/source/elementwise_functions3.cpp delete mode 100644 dpctl/tensor/libtensor/source/elementwise_functions4.cpp diff --git a/dpctl/tensor/CMakeLists.txt b/dpctl/tensor/CMakeLists.txt index fa23fd237e..88f78525d4 100644 --- a/dpctl/tensor/CMakeLists.txt +++ b/dpctl/tensor/CMakeLists.txt @@ -30,6 +30,77 @@ if(WIN32) endif() endif() +set(_elementwise_sources + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/elementwise_common.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/abs.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/acos.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/acosh.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/add.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/asin.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/asinh.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/atan.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/atan2.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/atanh.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/bitwise_and.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/bitwise_invert.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/bitwise_left_shift.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/bitwise_or.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/bitwise_right_shift.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/bitwise_xor.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/cbrt.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/ceil.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/conj.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/copysign.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/cos.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/cosh.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/equal.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/exp.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/exp2.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/expm1.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/floor_divide.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/floor.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/greater_equal.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/greater.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/hypot.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/imag.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/isfinite.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/isinf.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/isnan.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/less_equal.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/less.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/log.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/log1p.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/log2.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/log10.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/logaddexp.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/logical_and.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/logical_not.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/logical_or.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/logical_xor.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/maximum.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/minimum.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/multiply.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/negative.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/not_equal.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/positive.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/pow.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/proj.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/real.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/remainder.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/round.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/rsqrt.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/sign.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/signbit.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/sin.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/sinh.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/sqrt.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/square.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/subtract.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/tan.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/tanh.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/true_divide.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/trunc.cpp +) set(_tensor_impl_sources ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/tensor_py.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/accumulators.cpp @@ -47,13 +118,12 @@ set(_tensor_impl_sources ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/where.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/boolean_reductions.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/device_support_queries.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions1.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions2.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions3.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions4.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/repeat.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/reduction_over_axis.cpp ) +list(APPEND _tensor_impl_sources + ${_elementwise_sources} +) set(python_module_name _tensor_impl) pybind11_add_module(${python_module_name} MODULE ${_tensor_impl_sources}) @@ -66,11 +136,10 @@ endif() set(_no_fast_math_sources ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/full_ctor.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/linear_sequences.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions1.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions2.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions3.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions4.cpp -${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/reduction_over_axis.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/reduction_over_axis.cpp +) +list(APPEND _no_fast_math_sources + ${_elementwise_sources} ) foreach(_src_fn ${_no_fast_math_sources}) get_source_file_property(_cmpl_options_prop ${_src_fn} COMPILE_OPTIONS) @@ -82,7 +151,8 @@ foreach(_src_fn ${_no_fast_math_sources}) endforeach() if (UNIX) set_source_files_properties( - ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/abs.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/sqrt.cpp PROPERTIES COMPILE_DEFINITIONS "USE_STD_ABS_FOR_COMPLEX_TYPES;USE_STD_SQRT_FOR_COMPLEX_TYPES") endif() target_compile_options(${python_module_name} PRIVATE -fno-sycl-id-queries-fit-in-int) diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/abs.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/abs.cpp new file mode 100644 index 0000000000..e359ace261 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/abs.cpp @@ -0,0 +1,119 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#include "dpctl4pybind11.hpp" +#include +#include +#include +#include +#include + +#include "abs.hpp" +#include "elementwise_functions.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/abs.hpp" +#include "kernels/elementwise_functions/common.hpp" + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +// U01: ==== ABS (x) +namespace impl +{ + +namespace abs_fn_ns = dpctl::tensor::kernels::abs; + +static unary_contig_impl_fn_ptr_t abs_contig_dispatch_vector[td_ns::num_types]; +static int abs_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + abs_strided_dispatch_vector[td_ns::num_types]; + +void populate_abs_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = abs_fn_ns; + + using fn_ns::AbsContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(abs_contig_dispatch_vector); + + using fn_ns::AbsStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(abs_strided_dispatch_vector); + + using fn_ns::AbsTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(abs_output_typeid_vector); +}; + +} // namespace impl + +void init_abs(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_abs_dispatch_vectors(); + using impl::abs_contig_dispatch_vector; + using impl::abs_output_typeid_vector; + using impl::abs_strided_dispatch_vector; + + auto abs_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, abs_output_typeid_vector, + abs_contig_dispatch_vector, abs_strided_dispatch_vector); + }; + m.def("_abs", abs_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto abs_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, abs_output_typeid_vector); + }; + m.def("_abs_result_type", abs_result_type_pyapi); + } +} + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions3.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/abs.hpp similarity index 95% rename from dpctl/tensor/libtensor/source/elementwise_functions3.hpp rename to dpctl/tensor/libtensor/source/elementwise_functions/abs.hpp index c5f7feab4f..d09eafc6bd 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions3.hpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/abs.hpp @@ -26,6 +26,8 @@ #pragma once #include +namespace py = pybind11; + namespace dpctl { namespace tensor @@ -33,7 +35,7 @@ namespace tensor namespace py_internal { -extern void init_elementwise_functions3(py::module_ m); +extern void init_abs(py::module_ m); } // namespace py_internal } // namespace tensor diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/acos.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/acos.cpp new file mode 100644 index 0000000000..da4827be05 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/acos.cpp @@ -0,0 +1,119 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#include "dpctl4pybind11.hpp" +#include +#include +#include +#include +#include + +#include "acos.hpp" +#include "elementwise_functions.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/acos.hpp" +#include "kernels/elementwise_functions/common.hpp" + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +// U02: ==== ACOS (x) +namespace impl +{ + +namespace acos_fn_ns = dpctl::tensor::kernels::acos; + +static unary_contig_impl_fn_ptr_t acos_contig_dispatch_vector[td_ns::num_types]; +static int acos_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + acos_strided_dispatch_vector[td_ns::num_types]; + +void populate_acos_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = acos_fn_ns; + + using fn_ns::AcosContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(acos_contig_dispatch_vector); + + using fn_ns::AcosStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(acos_strided_dispatch_vector); + + using fn_ns::AcosTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(acos_output_typeid_vector); +}; + +} // namespace impl + +void init_acos(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_acos_dispatch_vectors(); + using impl::acos_contig_dispatch_vector; + using impl::acos_output_typeid_vector; + using impl::acos_strided_dispatch_vector; + + auto acos_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, acos_output_typeid_vector, + acos_contig_dispatch_vector, acos_strided_dispatch_vector); + }; + m.def("_acos", acos_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto acos_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, acos_output_typeid_vector); + }; + m.def("_acos_result_type", acos_result_type_pyapi); + } +} + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/acos.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/acos.hpp new file mode 100644 index 0000000000..3a43d4087c --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/acos.hpp @@ -0,0 +1,42 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +extern void init_acos(py::module_ m); + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/acosh.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/acosh.cpp new file mode 100644 index 0000000000..0ca029e604 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/acosh.cpp @@ -0,0 +1,121 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#include "dpctl4pybind11.hpp" +#include +#include +#include +#include +#include + +#include "acosh.hpp" +#include "elementwise_functions.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/acosh.hpp" +#include "kernels/elementwise_functions/common.hpp" + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +// U03: ==== ACOSH (x) +namespace impl +{ + +namespace acosh_fn_ns = dpctl::tensor::kernels::acosh; + +static unary_contig_impl_fn_ptr_t + acosh_contig_dispatch_vector[td_ns::num_types]; +static int acosh_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + acosh_strided_dispatch_vector[td_ns::num_types]; + +void populate_acosh_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = acosh_fn_ns; + + using fn_ns::AcoshContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(acosh_contig_dispatch_vector); + + using fn_ns::AcoshStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(acosh_strided_dispatch_vector); + + using fn_ns::AcoshTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(acosh_output_typeid_vector); +}; + +} // namespace impl + +void init_acosh(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_acosh_dispatch_vectors(); + using impl::acosh_contig_dispatch_vector; + using impl::acosh_output_typeid_vector; + using impl::acosh_strided_dispatch_vector; + + auto acosh_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, acosh_output_typeid_vector, + acosh_contig_dispatch_vector, acosh_strided_dispatch_vector); + }; + m.def("_acosh", acosh_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto acosh_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, + acosh_output_typeid_vector); + }; + m.def("_acosh_result_type", acosh_result_type_pyapi); + } +} + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/acosh.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/acosh.hpp new file mode 100644 index 0000000000..dd13ba886c --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/acosh.hpp @@ -0,0 +1,42 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +extern void init_acosh(py::module_ m); + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/add.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/add.cpp new file mode 100644 index 0000000000..bd0818385f --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/add.cpp @@ -0,0 +1,229 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#include "dpctl4pybind11.hpp" +#include +#include +#include +#include +#include + +#include "add.hpp" +#include "elementwise_functions.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/add.hpp" +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/common_inplace.hpp" + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::binary_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_strided_impl_fn_ptr_t; + +using ew_cmn_ns::binary_inplace_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_inplace_row_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_inplace_strided_impl_fn_ptr_t; + +// B01: ===== ADD (x1, x2) +namespace impl +{ + +namespace add_fn_ns = dpctl::tensor::kernels::add; + +static binary_contig_impl_fn_ptr_t add_contig_dispatch_table[td_ns::num_types] + [td_ns::num_types]; +static int add_output_id_table[td_ns::num_types][td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + add_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +// add(matrix, row) +static binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t + add_contig_matrix_contig_row_broadcast_dispatch_table[td_ns::num_types] + [td_ns::num_types]; + +// add(row, matrix) +static binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t + add_contig_row_contig_matrix_broadcast_dispatch_table[td_ns::num_types] + [td_ns::num_types]; + +static binary_inplace_contig_impl_fn_ptr_t + add_inplace_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static binary_inplace_strided_impl_fn_ptr_t + add_inplace_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; +static binary_inplace_row_matrix_broadcast_impl_fn_ptr_t + add_inplace_row_matrix_dispatch_table[td_ns::num_types][td_ns::num_types]; + +void populate_add_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = add_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::AddTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(add_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::AddStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(add_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::AddContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(add_contig_dispatch_table); + + // function pointers for operation on contiguous matrix, contiguous row + // with contiguous matrix output + using fn_ns::AddContigMatrixContigRowBroadcastFactory; + DispatchTableBuilder< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t, + AddContigMatrixContigRowBroadcastFactory, num_types> + dtb4; + dtb4.populate_dispatch_table( + add_contig_matrix_contig_row_broadcast_dispatch_table); + + // function pointers for operation on contiguous row, contiguous matrix + // with contiguous matrix output + using fn_ns::AddContigRowContigMatrixBroadcastFactory; + DispatchTableBuilder< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t, + AddContigRowContigMatrixBroadcastFactory, num_types> + dtb5; + dtb5.populate_dispatch_table( + add_contig_row_contig_matrix_broadcast_dispatch_table); + + // function pointers for inplace operation on general strided arrays + using fn_ns::AddInplaceStridedFactory; + DispatchTableBuilder + dtb6; + dtb6.populate_dispatch_table(add_inplace_strided_dispatch_table); + + // function pointers for inplace operation on contiguous inputs and output + using fn_ns::AddInplaceContigFactory; + DispatchTableBuilder + dtb7; + dtb7.populate_dispatch_table(add_inplace_contig_dispatch_table); + + // function pointers for inplace operation on contiguous matrix + // and contiguous row + using fn_ns::AddInplaceRowMatrixBroadcastFactory; + DispatchTableBuilder + dtb8; + dtb8.populate_dispatch_table(add_inplace_row_matrix_dispatch_table); +}; + +} // namespace impl + +void init_add(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_add_dispatch_tables(); + using impl::add_contig_dispatch_table; + using impl::add_contig_matrix_contig_row_broadcast_dispatch_table; + using impl::add_contig_row_contig_matrix_broadcast_dispatch_table; + using impl::add_output_id_table; + using impl::add_strided_dispatch_table; + + auto add_pyapi = [&](const arrayT &src1, const arrayT &src2, + const arrayT &dst, sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, add_output_id_table, + // function pointers to handle operation on contiguous arrays + // (pointers may be nullptr) + add_contig_dispatch_table, + // function pointers to handle operation on strided arrays (most + // general case) + add_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + add_contig_matrix_contig_row_broadcast_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + add_contig_row_contig_matrix_broadcast_dispatch_table); + }; + auto add_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + add_output_id_table); + }; + m.def("_add", add_pyapi, "", py::arg("src1"), py::arg("src2"), + py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_add_result_type", add_result_type_pyapi, ""); + + using impl::add_inplace_contig_dispatch_table; + using impl::add_inplace_row_matrix_dispatch_table; + using impl::add_inplace_strided_dispatch_table; + + auto add_inplace_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_binary_inplace_ufunc( + src, dst, exec_q, depends, add_output_id_table, + // function pointers to handle inplace operation on + // contiguous arrays (pointers may be nullptr) + add_inplace_contig_dispatch_table, + // function pointers to handle inplace operation on strided + // arrays (most general case) + add_inplace_strided_dispatch_table, + // function pointers to handle inplace operation on + // c-contig matrix with c-contig row with broadcasting + // (may be nullptr) + add_inplace_row_matrix_dispatch_table); + }; + m.def("_add_inplace", add_inplace_pyapi, "", py::arg("lhs"), + py::arg("rhs"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + } +} + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions1.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/add.hpp similarity index 95% rename from dpctl/tensor/libtensor/source/elementwise_functions1.hpp rename to dpctl/tensor/libtensor/source/elementwise_functions/add.hpp index 9bc3976abf..5f88bfaa04 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions1.hpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/add.hpp @@ -26,6 +26,8 @@ #pragma once #include +namespace py = pybind11; + namespace dpctl { namespace tensor @@ -33,7 +35,7 @@ namespace tensor namespace py_internal { -extern void init_elementwise_functions1(py::module_ m); +extern void init_add(py::module_ m); } // namespace py_internal } // namespace tensor diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/asin.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/asin.cpp new file mode 100644 index 0000000000..eb253cb747 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/asin.cpp @@ -0,0 +1,119 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#include "dpctl4pybind11.hpp" +#include +#include +#include +#include +#include + +#include "asin.hpp" +#include "elementwise_functions.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/asin.hpp" +#include "kernels/elementwise_functions/common.hpp" + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +// U04: ==== ASIN (x) +namespace impl +{ + +namespace asin_fn_ns = dpctl::tensor::kernels::asin; + +static unary_contig_impl_fn_ptr_t asin_contig_dispatch_vector[td_ns::num_types]; +static int asin_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + asin_strided_dispatch_vector[td_ns::num_types]; + +void populate_asin_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = asin_fn_ns; + + using fn_ns::AsinContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(asin_contig_dispatch_vector); + + using fn_ns::AsinStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(asin_strided_dispatch_vector); + + using fn_ns::AsinTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(asin_output_typeid_vector); +}; + +} // namespace impl + +void init_asin(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_asin_dispatch_vectors(); + using impl::asin_contig_dispatch_vector; + using impl::asin_output_typeid_vector; + using impl::asin_strided_dispatch_vector; + + auto asin_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, asin_output_typeid_vector, + asin_contig_dispatch_vector, asin_strided_dispatch_vector); + }; + m.def("_asin", asin_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto asin_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, asin_output_typeid_vector); + }; + m.def("_asin_result_type", asin_result_type_pyapi); + } +} + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/asin.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/asin.hpp new file mode 100644 index 0000000000..0beed1d19c --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/asin.hpp @@ -0,0 +1,42 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +extern void init_asin(py::module_ m); + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/asinh.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/asinh.cpp new file mode 100644 index 0000000000..6c89e3f592 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/asinh.cpp @@ -0,0 +1,121 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#include "dpctl4pybind11.hpp" +#include +#include +#include +#include +#include + +#include "asinh.hpp" +#include "elementwise_functions.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/asinh.hpp" +#include "kernels/elementwise_functions/common.hpp" + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +// U05: ==== ASINH (x) +namespace impl +{ + +namespace asinh_fn_ns = dpctl::tensor::kernels::asinh; + +static unary_contig_impl_fn_ptr_t + asinh_contig_dispatch_vector[td_ns::num_types]; +static int asinh_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + asinh_strided_dispatch_vector[td_ns::num_types]; + +void populate_asinh_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = asinh_fn_ns; + + using fn_ns::AsinhContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(asinh_contig_dispatch_vector); + + using fn_ns::AsinhStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(asinh_strided_dispatch_vector); + + using fn_ns::AsinhTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(asinh_output_typeid_vector); +}; + +} // namespace impl + +void init_asinh(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_asinh_dispatch_vectors(); + using impl::asinh_contig_dispatch_vector; + using impl::asinh_output_typeid_vector; + using impl::asinh_strided_dispatch_vector; + + auto asinh_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, asinh_output_typeid_vector, + asinh_contig_dispatch_vector, asinh_strided_dispatch_vector); + }; + m.def("_asinh", asinh_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto asinh_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, + asinh_output_typeid_vector); + }; + m.def("_asinh_result_type", asinh_result_type_pyapi); + } +} + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/asinh.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/asinh.hpp new file mode 100644 index 0000000000..22cc37b2d8 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/asinh.hpp @@ -0,0 +1,42 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +extern void init_asinh(py::module_ m); + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/atan.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/atan.cpp new file mode 100644 index 0000000000..03d813813f --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/atan.cpp @@ -0,0 +1,119 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#include "dpctl4pybind11.hpp" +#include +#include +#include +#include +#include + +#include "atan.hpp" +#include "elementwise_functions.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/atan.hpp" +#include "kernels/elementwise_functions/common.hpp" + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +// U06: ==== ATAN (x) +namespace impl +{ + +namespace atan_fn_ns = dpctl::tensor::kernels::atan; + +static unary_contig_impl_fn_ptr_t atan_contig_dispatch_vector[td_ns::num_types]; +static int atan_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + atan_strided_dispatch_vector[td_ns::num_types]; + +void populate_atan_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = atan_fn_ns; + + using fn_ns::AtanContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(atan_contig_dispatch_vector); + + using fn_ns::AtanStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(atan_strided_dispatch_vector); + + using fn_ns::AtanTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(atan_output_typeid_vector); +}; + +} // namespace impl + +void init_atan(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_atan_dispatch_vectors(); + using impl::atan_contig_dispatch_vector; + using impl::atan_output_typeid_vector; + using impl::atan_strided_dispatch_vector; + + auto atan_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, atan_output_typeid_vector, + atan_contig_dispatch_vector, atan_strided_dispatch_vector); + }; + m.def("_atan", atan_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto atan_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, atan_output_typeid_vector); + }; + m.def("_atan_result_type", atan_result_type_pyapi); + } +} + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/atan.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/atan.hpp new file mode 100644 index 0000000000..86df06699c --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/atan.hpp @@ -0,0 +1,42 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +extern void init_atan(py::module_ m); + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/atan2.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/atan2.cpp new file mode 100644 index 0000000000..07a9ec17c3 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/atan2.cpp @@ -0,0 +1,140 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#include "dpctl4pybind11.hpp" +#include +#include +#include +#include +#include + +#include "atan2.hpp" +#include "elementwise_functions.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/atan2.hpp" +#include "kernels/elementwise_functions/common.hpp" + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::binary_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_strided_impl_fn_ptr_t; + +// B02: ===== ATAN2 (x1, x2) +namespace impl +{ +namespace atan2_fn_ns = dpctl::tensor::kernels::atan2; + +static binary_contig_impl_fn_ptr_t + atan2_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static int atan2_output_id_table[td_ns::num_types][td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + atan2_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +void populate_atan2_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = atan2_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::Atan2TypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(atan2_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::Atan2StridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(atan2_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::Atan2ContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(atan2_contig_dispatch_table); +}; + +} // namespace impl + +void init_atan2(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_atan2_dispatch_tables(); + using impl::atan2_contig_dispatch_table; + using impl::atan2_output_id_table; + using impl::atan2_strided_dispatch_table; + + auto atan2_pyapi = [&](const arrayT &src1, const arrayT &src2, + const arrayT &dst, sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, atan2_output_id_table, + // function pointers to handle operation on contiguous arrays + // (pointers may be nullptr) + atan2_contig_dispatch_table, + // function pointers to handle operation on strided arrays (most + // general case) + atan2_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + auto atan2_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + atan2_output_id_table); + }; + m.def("_atan2", atan2_pyapi, "", py::arg("src1"), py::arg("src2"), + py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_atan2_result_type", atan2_result_type_pyapi, ""); + } +} + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/atan2.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/atan2.hpp new file mode 100644 index 0000000000..f369d12208 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/atan2.hpp @@ -0,0 +1,42 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +extern void init_atan2(py::module_ m); + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/atanh.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/atanh.cpp new file mode 100644 index 0000000000..83b350b607 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/atanh.cpp @@ -0,0 +1,121 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#include "dpctl4pybind11.hpp" +#include +#include +#include +#include +#include + +#include "atanh.hpp" +#include "elementwise_functions.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/atanh.hpp" +#include "kernels/elementwise_functions/common.hpp" + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +// U07: ==== ATANH (x) +namespace impl +{ + +namespace atanh_fn_ns = dpctl::tensor::kernels::atanh; + +static unary_contig_impl_fn_ptr_t + atanh_contig_dispatch_vector[td_ns::num_types]; +static int atanh_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + atanh_strided_dispatch_vector[td_ns::num_types]; + +void populate_atanh_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = atanh_fn_ns; + + using fn_ns::AtanhContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(atanh_contig_dispatch_vector); + + using fn_ns::AtanhStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(atanh_strided_dispatch_vector); + + using fn_ns::AtanhTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(atanh_output_typeid_vector); +}; + +} // namespace impl + +void init_atanh(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_atanh_dispatch_vectors(); + using impl::atanh_contig_dispatch_vector; + using impl::atanh_output_typeid_vector; + using impl::atanh_strided_dispatch_vector; + + auto atanh_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, atanh_output_typeid_vector, + atanh_contig_dispatch_vector, atanh_strided_dispatch_vector); + }; + m.def("_atanh", atanh_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto atanh_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, + atanh_output_typeid_vector); + }; + m.def("_atanh_result_type", atanh_result_type_pyapi); + } +} + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/atanh.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/atanh.hpp new file mode 100644 index 0000000000..ba2930d80e --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/atanh.hpp @@ -0,0 +1,42 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +extern void init_atanh(py::module_ m); + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/bitwise_and.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/bitwise_and.cpp new file mode 100644 index 0000000000..c125adb840 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/bitwise_and.cpp @@ -0,0 +1,190 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#include "dpctl4pybind11.hpp" +#include +#include +#include +#include +#include + +#include "bitwise_and.hpp" +#include "elementwise_functions.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/bitwise_and.hpp" +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/common_inplace.hpp" + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::binary_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_strided_impl_fn_ptr_t; + +using ew_cmn_ns::binary_inplace_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_inplace_row_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_inplace_strided_impl_fn_ptr_t; + +// B03: ===== BITWISE_AND (x1, x2) +namespace impl +{ +namespace bitwise_and_fn_ns = dpctl::tensor::kernels::bitwise_and; + +static binary_contig_impl_fn_ptr_t + bitwise_and_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static int bitwise_and_output_id_table[td_ns::num_types][td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + bitwise_and_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +static binary_inplace_contig_impl_fn_ptr_t + bitwise_and_inplace_contig_dispatch_table[td_ns::num_types] + [td_ns::num_types]; +static binary_inplace_strided_impl_fn_ptr_t + bitwise_and_inplace_strided_dispatch_table[td_ns::num_types] + [td_ns::num_types]; + +void populate_bitwise_and_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = bitwise_and_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::BitwiseAndTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(bitwise_and_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::BitwiseAndStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(bitwise_and_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::BitwiseAndContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(bitwise_and_contig_dispatch_table); + + // function pointers for inplace operation on general strided arrays + using fn_ns::BitwiseAndInplaceStridedFactory; + DispatchTableBuilder + dtb4; + dtb4.populate_dispatch_table(bitwise_and_inplace_strided_dispatch_table); + + // function pointers for inplace operation on contiguous inputs and output + using fn_ns::BitwiseAndInplaceContigFactory; + DispatchTableBuilder + dtb5; + dtb5.populate_dispatch_table(bitwise_and_inplace_contig_dispatch_table); +}; + +} // namespace impl + +void init_bitwise_and(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_bitwise_and_dispatch_tables(); + using impl::bitwise_and_contig_dispatch_table; + using impl::bitwise_and_output_id_table; + using impl::bitwise_and_strided_dispatch_table; + + auto bitwise_and_pyapi = [&](const arrayT &src1, const arrayT &src2, + const arrayT &dst, sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, bitwise_and_output_id_table, + // function pointers to handle operation on contiguous arrays + // (pointers may be nullptr) + bitwise_and_contig_dispatch_table, + // function pointers to handle operation on strided arrays (most + // general case) + bitwise_and_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + auto bitwise_and_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + bitwise_and_output_id_table); + }; + m.def("_bitwise_and", bitwise_and_pyapi, "", py::arg("src1"), + py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_bitwise_and_result_type", bitwise_and_result_type_pyapi, ""); + + using impl::bitwise_and_inplace_contig_dispatch_table; + using impl::bitwise_and_inplace_strided_dispatch_table; + + auto bitwise_and_inplace_pyapi = + [&](const arrayT &src, const arrayT &dst, sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_binary_inplace_ufunc( + src, dst, exec_q, depends, bitwise_and_output_id_table, + // function pointers to handle inplace operation on + // contiguous arrays (pointers may be nullptr) + bitwise_and_inplace_contig_dispatch_table, + // function pointers to handle inplace operation on strided + // arrays (most general case) + bitwise_and_inplace_strided_dispatch_table, + // function pointers to handle inplace operation on + // c-contig matrix with c-contig row with broadcasting + // (may be nullptr) + td_ns::NullPtrTable< + binary_inplace_row_matrix_broadcast_impl_fn_ptr_t>{}); + }; + m.def("_bitwise_and_inplace", bitwise_and_inplace_pyapi, "", + py::arg("lhs"), py::arg("rhs"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + } +} + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/bitwise_and.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/bitwise_and.hpp new file mode 100644 index 0000000000..682b337efd --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/bitwise_and.hpp @@ -0,0 +1,42 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +extern void init_bitwise_and(py::module_ m); + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/bitwise_invert.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/bitwise_invert.cpp new file mode 100644 index 0000000000..445a123a87 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/bitwise_invert.cpp @@ -0,0 +1,123 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#include "dpctl4pybind11.hpp" +#include +#include +#include +#include +#include + +#include "bitwise_invert.hpp" +#include "elementwise_functions.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/bitwise_invert.hpp" +#include "kernels/elementwise_functions/common.hpp" + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +// U08: ===== BITWISE_INVERT (x) +namespace impl +{ + +namespace bitwise_invert_fn_ns = dpctl::tensor::kernels::bitwise_invert; + +static unary_contig_impl_fn_ptr_t + bitwise_invert_contig_dispatch_vector[td_ns::num_types]; +static int bitwise_invert_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + bitwise_invert_strided_dispatch_vector[td_ns::num_types]; + +void populate_bitwise_invert_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = bitwise_invert_fn_ns; + + using fn_ns::BitwiseInvertContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(bitwise_invert_contig_dispatch_vector); + + using fn_ns::BitwiseInvertStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(bitwise_invert_strided_dispatch_vector); + + using fn_ns::BitwiseInvertTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(bitwise_invert_output_typeid_vector); +}; + +} // namespace impl + +void init_bitwise_invert(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_bitwise_invert_dispatch_vectors(); + using impl::bitwise_invert_contig_dispatch_vector; + using impl::bitwise_invert_output_typeid_vector; + using impl::bitwise_invert_strided_dispatch_vector; + + auto bitwise_invert_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc(src, dst, exec_q, depends, + bitwise_invert_output_typeid_vector, + bitwise_invert_contig_dispatch_vector, + bitwise_invert_strided_dispatch_vector); + }; + m.def("_bitwise_invert", bitwise_invert_pyapi, "", py::arg("src"), + py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + + auto bitwise_invert_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type( + dtype, bitwise_invert_output_typeid_vector); + }; + m.def("_bitwise_invert_result_type", bitwise_invert_result_type_pyapi); + } +} + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/bitwise_invert.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/bitwise_invert.hpp new file mode 100644 index 0000000000..5b5d8398dc --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/bitwise_invert.hpp @@ -0,0 +1,42 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +extern void init_bitwise_invert(py::module_ m); + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/bitwise_left_shift.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/bitwise_left_shift.cpp new file mode 100644 index 0000000000..5ba54c66c8 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/bitwise_left_shift.cpp @@ -0,0 +1,200 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#include "dpctl4pybind11.hpp" +#include +#include +#include +#include +#include + +#include "bitwise_left_shift.hpp" +#include "elementwise_functions.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/bitwise_left_shift.hpp" +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/common_inplace.hpp" + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::binary_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_strided_impl_fn_ptr_t; + +using ew_cmn_ns::binary_inplace_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_inplace_row_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_inplace_strided_impl_fn_ptr_t; + +// B04: ===== BITWISE_LEFT_SHIFT (x1, x2) +namespace impl +{ +namespace bitwise_left_shift_fn_ns = dpctl::tensor::kernels::bitwise_left_shift; + +static binary_contig_impl_fn_ptr_t + bitwise_left_shift_contig_dispatch_table[td_ns::num_types] + [td_ns::num_types]; +static int bitwise_left_shift_output_id_table[td_ns::num_types] + [td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + bitwise_left_shift_strided_dispatch_table[td_ns::num_types] + [td_ns::num_types]; + +static binary_inplace_contig_impl_fn_ptr_t + bitwise_left_shift_inplace_contig_dispatch_table[td_ns::num_types] + [td_ns::num_types]; +static binary_inplace_strided_impl_fn_ptr_t + bitwise_left_shift_inplace_strided_dispatch_table[td_ns::num_types] + [td_ns::num_types]; + +void populate_bitwise_left_shift_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = bitwise_left_shift_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::BitwiseLeftShiftTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(bitwise_left_shift_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::BitwiseLeftShiftStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(bitwise_left_shift_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::BitwiseLeftShiftContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(bitwise_left_shift_contig_dispatch_table); + + // function pointers for inplace operation on general strided arrays + using fn_ns::BitwiseLeftShiftInplaceStridedFactory; + DispatchTableBuilder + dtb4; + dtb4.populate_dispatch_table( + bitwise_left_shift_inplace_strided_dispatch_table); + + // function pointers for inplace operation on contiguous inputs and output + using fn_ns::BitwiseLeftShiftInplaceContigFactory; + DispatchTableBuilder + dtb5; + dtb5.populate_dispatch_table( + bitwise_left_shift_inplace_contig_dispatch_table); +}; + +} // namespace impl + +void init_bitwise_left_shift(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_bitwise_left_shift_dispatch_tables(); + using impl::bitwise_left_shift_contig_dispatch_table; + using impl::bitwise_left_shift_output_id_table; + using impl::bitwise_left_shift_strided_dispatch_table; + + auto bitwise_left_shift_pyapi = [&](const arrayT &src1, + const arrayT &src2, + const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, + bitwise_left_shift_output_id_table, + // function pointers to handle operation on contiguous arrays + // (pointers may be nullptr) + bitwise_left_shift_contig_dispatch_table, + // function pointers to handle operation on strided arrays (most + // general case) + bitwise_left_shift_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + auto bitwise_left_shift_result_type_pyapi = + [&](const py::dtype &dtype1, const py::dtype &dtype2) { + return py_binary_ufunc_result_type( + dtype1, dtype2, bitwise_left_shift_output_id_table); + }; + m.def("_bitwise_left_shift", bitwise_left_shift_pyapi, "", + py::arg("src1"), py::arg("src2"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + m.def("_bitwise_left_shift_result_type", + bitwise_left_shift_result_type_pyapi, ""); + + using impl::bitwise_left_shift_inplace_contig_dispatch_table; + using impl::bitwise_left_shift_inplace_strided_dispatch_table; + + auto bitwise_left_shift_inplace_pyapi = + [&](const arrayT &src, const arrayT &dst, sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_binary_inplace_ufunc( + src, dst, exec_q, depends, + bitwise_left_shift_output_id_table, + // function pointers to handle inplace operation on + // contiguous arrays (pointers may be nullptr) + bitwise_left_shift_inplace_contig_dispatch_table, + // function pointers to handle inplace operation on strided + // arrays (most general case) + bitwise_left_shift_inplace_strided_dispatch_table, + // function pointers to handle inplace operation on + // c-contig matrix with c-contig row with broadcasting + // (may be nullptr) + td_ns::NullPtrTable< + binary_inplace_row_matrix_broadcast_impl_fn_ptr_t>{}); + }; + m.def("_bitwise_left_shift_inplace", bitwise_left_shift_inplace_pyapi, + "", py::arg("lhs"), py::arg("rhs"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + } +} + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/bitwise_left_shift.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/bitwise_left_shift.hpp new file mode 100644 index 0000000000..9edcba43ab --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/bitwise_left_shift.hpp @@ -0,0 +1,42 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +extern void init_bitwise_left_shift(py::module_ m); + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/bitwise_or.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/bitwise_or.cpp new file mode 100644 index 0000000000..5de3bfe013 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/bitwise_or.cpp @@ -0,0 +1,190 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#include "dpctl4pybind11.hpp" +#include +#include +#include +#include +#include + +#include "bitwise_or.hpp" +#include "elementwise_functions.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/bitwise_or.hpp" +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/common_inplace.hpp" + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::binary_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_strided_impl_fn_ptr_t; + +using ew_cmn_ns::binary_inplace_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_inplace_row_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_inplace_strided_impl_fn_ptr_t; + +// B05: ===== BITWISE_OR (x1, x2) +namespace impl +{ +namespace bitwise_or_fn_ns = dpctl::tensor::kernels::bitwise_or; + +static binary_contig_impl_fn_ptr_t + bitwise_or_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static int bitwise_or_output_id_table[td_ns::num_types][td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + bitwise_or_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +static binary_inplace_contig_impl_fn_ptr_t + bitwise_or_inplace_contig_dispatch_table[td_ns::num_types] + [td_ns::num_types]; +static binary_inplace_strided_impl_fn_ptr_t + bitwise_or_inplace_strided_dispatch_table[td_ns::num_types] + [td_ns::num_types]; + +void populate_bitwise_or_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = bitwise_or_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::BitwiseOrTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(bitwise_or_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::BitwiseOrStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(bitwise_or_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::BitwiseOrContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(bitwise_or_contig_dispatch_table); + + // function pointers for inplace operation on general strided arrays + using fn_ns::BitwiseOrInplaceStridedFactory; + DispatchTableBuilder + dtb4; + dtb4.populate_dispatch_table(bitwise_or_inplace_strided_dispatch_table); + + // function pointers for inplace operation on contiguous inputs and output + using fn_ns::BitwiseOrInplaceContigFactory; + DispatchTableBuilder + dtb5; + dtb5.populate_dispatch_table(bitwise_or_inplace_contig_dispatch_table); +}; + +} // namespace impl + +void init_bitwise_or(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_bitwise_or_dispatch_tables(); + using impl::bitwise_or_contig_dispatch_table; + using impl::bitwise_or_output_id_table; + using impl::bitwise_or_strided_dispatch_table; + + auto bitwise_or_pyapi = [&](const arrayT &src1, const arrayT &src2, + const arrayT &dst, sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, bitwise_or_output_id_table, + // function pointers to handle operation on contiguous arrays + // (pointers may be nullptr) + bitwise_or_contig_dispatch_table, + // function pointers to handle operation on strided arrays (most + // general case) + bitwise_or_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + auto bitwise_or_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + bitwise_or_output_id_table); + }; + m.def("_bitwise_or", bitwise_or_pyapi, "", py::arg("src1"), + py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_bitwise_or_result_type", bitwise_or_result_type_pyapi, ""); + + using impl::bitwise_or_inplace_contig_dispatch_table; + using impl::bitwise_or_inplace_strided_dispatch_table; + + auto bitwise_or_inplace_pyapi = + [&](const arrayT &src, const arrayT &dst, sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_binary_inplace_ufunc( + src, dst, exec_q, depends, bitwise_or_output_id_table, + // function pointers to handle inplace operation on + // contiguous arrays (pointers may be nullptr) + bitwise_or_inplace_contig_dispatch_table, + // function pointers to handle inplace operation on strided + // arrays (most general case) + bitwise_or_inplace_strided_dispatch_table, + // function pointers to handle inplace operation on + // c-contig matrix with c-contig row with broadcasting + // (may be nullptr) + td_ns::NullPtrTable< + binary_inplace_row_matrix_broadcast_impl_fn_ptr_t>{}); + }; + m.def("_bitwise_or_inplace", bitwise_or_inplace_pyapi, "", + py::arg("lhs"), py::arg("rhs"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + } +} + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/bitwise_or.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/bitwise_or.hpp new file mode 100644 index 0000000000..7603ed8277 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/bitwise_or.hpp @@ -0,0 +1,42 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +extern void init_bitwise_or(py::module_ m); + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/bitwise_right_shift.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/bitwise_right_shift.cpp new file mode 100644 index 0000000000..e1285f79ac --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/bitwise_right_shift.cpp @@ -0,0 +1,201 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#include "dpctl4pybind11.hpp" +#include +#include +#include +#include +#include + +#include "bitwise_right_shift.hpp" +#include "elementwise_functions.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/bitwise_right_shift.hpp" +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/common_inplace.hpp" + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::binary_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_strided_impl_fn_ptr_t; + +using ew_cmn_ns::binary_inplace_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_inplace_row_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_inplace_strided_impl_fn_ptr_t; + +// B06: ===== BITWISE_RIGHT_SHIFT (x1, x2) +namespace impl +{ +namespace bitwise_right_shift_fn_ns = + dpctl::tensor::kernels::bitwise_right_shift; + +static binary_contig_impl_fn_ptr_t + bitwise_right_shift_contig_dispatch_table[td_ns::num_types] + [td_ns::num_types]; +static int bitwise_right_shift_output_id_table[td_ns::num_types] + [td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + bitwise_right_shift_strided_dispatch_table[td_ns::num_types] + [td_ns::num_types]; + +static binary_inplace_contig_impl_fn_ptr_t + bitwise_right_shift_inplace_contig_dispatch_table[td_ns::num_types] + [td_ns::num_types]; +static binary_inplace_strided_impl_fn_ptr_t + bitwise_right_shift_inplace_strided_dispatch_table[td_ns::num_types] + [td_ns::num_types]; + +void populate_bitwise_right_shift_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = bitwise_right_shift_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::BitwiseRightShiftTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(bitwise_right_shift_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::BitwiseRightShiftStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(bitwise_right_shift_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::BitwiseRightShiftContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(bitwise_right_shift_contig_dispatch_table); + + // function pointers for inplace operation on general strided arrays + using fn_ns::BitwiseRightShiftInplaceStridedFactory; + DispatchTableBuilder + dtb4; + dtb4.populate_dispatch_table( + bitwise_right_shift_inplace_strided_dispatch_table); + + // function pointers for inplace operation on contiguous inputs and output + using fn_ns::BitwiseRightShiftInplaceContigFactory; + DispatchTableBuilder + dtb5; + dtb5.populate_dispatch_table( + bitwise_right_shift_inplace_contig_dispatch_table); +}; + +} // namespace impl + +void init_bitwise_right_shift(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_bitwise_right_shift_dispatch_tables(); + using impl::bitwise_right_shift_contig_dispatch_table; + using impl::bitwise_right_shift_output_id_table; + using impl::bitwise_right_shift_strided_dispatch_table; + + auto bitwise_right_shift_pyapi = [&](const arrayT &src1, + const arrayT &src2, + const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, + bitwise_right_shift_output_id_table, + // function pointers to handle operation on contiguous arrays + // (pointers may be nullptr) + bitwise_right_shift_contig_dispatch_table, + // function pointers to handle operation on strided arrays (most + // general case) + bitwise_right_shift_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + auto bitwise_right_shift_result_type_pyapi = + [&](const py::dtype &dtype1, const py::dtype &dtype2) { + return py_binary_ufunc_result_type( + dtype1, dtype2, bitwise_right_shift_output_id_table); + }; + m.def("_bitwise_right_shift", bitwise_right_shift_pyapi, "", + py::arg("src1"), py::arg("src2"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + m.def("_bitwise_right_shift_result_type", + bitwise_right_shift_result_type_pyapi, ""); + + using impl::bitwise_right_shift_inplace_contig_dispatch_table; + using impl::bitwise_right_shift_inplace_strided_dispatch_table; + + auto bitwise_right_shift_inplace_pyapi = + [&](const arrayT &src, const arrayT &dst, sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_binary_inplace_ufunc( + src, dst, exec_q, depends, + bitwise_right_shift_output_id_table, + // function pointers to handle inplace operation on + // contiguous arrays (pointers may be nullptr) + bitwise_right_shift_inplace_contig_dispatch_table, + // function pointers to handle inplace operation on strided + // arrays (most general case) + bitwise_right_shift_inplace_strided_dispatch_table, + // function pointers to handle inplace operation on + // c-contig matrix with c-contig row with broadcasting + // (may be nullptr) + td_ns::NullPtrTable< + binary_inplace_row_matrix_broadcast_impl_fn_ptr_t>{}); + }; + m.def("_bitwise_right_shift_inplace", bitwise_right_shift_inplace_pyapi, + "", py::arg("lhs"), py::arg("rhs"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + } +} + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/bitwise_right_shift.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/bitwise_right_shift.hpp new file mode 100644 index 0000000000..5ce2bca4e7 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/bitwise_right_shift.hpp @@ -0,0 +1,42 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +extern void init_bitwise_right_shift(py::module_ m); + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/bitwise_xor.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/bitwise_xor.cpp new file mode 100644 index 0000000000..18e60900cc --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/bitwise_xor.cpp @@ -0,0 +1,190 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#include "dpctl4pybind11.hpp" +#include +#include +#include +#include +#include + +#include "bitwise_xor.hpp" +#include "elementwise_functions.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/bitwise_xor.hpp" +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/common_inplace.hpp" + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::binary_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_strided_impl_fn_ptr_t; + +using ew_cmn_ns::binary_inplace_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_inplace_row_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_inplace_strided_impl_fn_ptr_t; + +// B07: ===== BITWISE_XOR (x1, x2) +namespace impl +{ +namespace bitwise_xor_fn_ns = dpctl::tensor::kernels::bitwise_xor; + +static binary_contig_impl_fn_ptr_t + bitwise_xor_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static int bitwise_xor_output_id_table[td_ns::num_types][td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + bitwise_xor_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +static binary_inplace_contig_impl_fn_ptr_t + bitwise_xor_inplace_contig_dispatch_table[td_ns::num_types] + [td_ns::num_types]; +static binary_inplace_strided_impl_fn_ptr_t + bitwise_xor_inplace_strided_dispatch_table[td_ns::num_types] + [td_ns::num_types]; + +void populate_bitwise_xor_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = bitwise_xor_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::BitwiseXorTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(bitwise_xor_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::BitwiseXorStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(bitwise_xor_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::BitwiseXorContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(bitwise_xor_contig_dispatch_table); + + // function pointers for inplace operation on general strided arrays + using fn_ns::BitwiseXorInplaceStridedFactory; + DispatchTableBuilder + dtb4; + dtb4.populate_dispatch_table(bitwise_xor_inplace_strided_dispatch_table); + + // function pointers for inplace operation on contiguous inputs and output + using fn_ns::BitwiseXorInplaceContigFactory; + DispatchTableBuilder + dtb5; + dtb5.populate_dispatch_table(bitwise_xor_inplace_contig_dispatch_table); +}; + +} // namespace impl + +void init_bitwise_xor(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_bitwise_xor_dispatch_tables(); + using impl::bitwise_xor_contig_dispatch_table; + using impl::bitwise_xor_output_id_table; + using impl::bitwise_xor_strided_dispatch_table; + + auto bitwise_xor_pyapi = [&](const arrayT &src1, const arrayT &src2, + const arrayT &dst, sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, bitwise_xor_output_id_table, + // function pointers to handle operation on contiguous arrays + // (pointers may be nullptr) + bitwise_xor_contig_dispatch_table, + // function pointers to handle operation on strided arrays (most + // general case) + bitwise_xor_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + auto bitwise_xor_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + bitwise_xor_output_id_table); + }; + m.def("_bitwise_xor", bitwise_xor_pyapi, "", py::arg("src1"), + py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_bitwise_xor_result_type", bitwise_xor_result_type_pyapi, ""); + + using impl::bitwise_xor_inplace_contig_dispatch_table; + using impl::bitwise_xor_inplace_strided_dispatch_table; + + auto bitwise_xor_inplace_pyapi = + [&](const arrayT &src, const arrayT &dst, sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_binary_inplace_ufunc( + src, dst, exec_q, depends, bitwise_xor_output_id_table, + // function pointers to handle inplace operation on + // contiguous arrays (pointers may be nullptr) + bitwise_xor_inplace_contig_dispatch_table, + // function pointers to handle inplace operation on strided + // arrays (most general case) + bitwise_xor_inplace_strided_dispatch_table, + // function pointers to handle inplace operation on + // c-contig matrix with c-contig row with broadcasting + // (may be nullptr) + td_ns::NullPtrTable< + binary_inplace_row_matrix_broadcast_impl_fn_ptr_t>{}); + }; + m.def("_bitwise_xor_inplace", bitwise_xor_inplace_pyapi, "", + py::arg("lhs"), py::arg("rhs"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + } +} + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/bitwise_xor.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/bitwise_xor.hpp new file mode 100644 index 0000000000..7b092aadda --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/bitwise_xor.hpp @@ -0,0 +1,42 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +extern void init_bitwise_xor(py::module_ m); + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/cbrt.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/cbrt.cpp new file mode 100644 index 0000000000..c526788496 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/cbrt.cpp @@ -0,0 +1,119 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#include "dpctl4pybind11.hpp" +#include +#include +#include +#include +#include + +#include "cbrt.hpp" +#include "elementwise_functions.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/cbrt.hpp" +#include "kernels/elementwise_functions/common.hpp" + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +// U??: ==== CBRT (x) +namespace impl +{ + +namespace cbrt_fn_ns = dpctl::tensor::kernels::cbrt; + +static unary_contig_impl_fn_ptr_t cbrt_contig_dispatch_vector[td_ns::num_types]; +static int cbrt_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + cbrt_strided_dispatch_vector[td_ns::num_types]; + +void populate_cbrt_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = cbrt_fn_ns; + + using fn_ns::CbrtContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(cbrt_contig_dispatch_vector); + + using fn_ns::CbrtStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(cbrt_strided_dispatch_vector); + + using fn_ns::CbrtTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(cbrt_output_typeid_vector); +}; + +} // namespace impl + +void init_cbrt(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_cbrt_dispatch_vectors(); + using impl::cbrt_contig_dispatch_vector; + using impl::cbrt_output_typeid_vector; + using impl::cbrt_strided_dispatch_vector; + + auto cbrt_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, cbrt_output_typeid_vector, + cbrt_contig_dispatch_vector, cbrt_strided_dispatch_vector); + }; + m.def("_cbrt", cbrt_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto cbrt_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, cbrt_output_typeid_vector); + }; + m.def("_cbrt_result_type", cbrt_result_type_pyapi); + } +} + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/cbrt.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/cbrt.hpp new file mode 100644 index 0000000000..74da1de81a --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/cbrt.hpp @@ -0,0 +1,42 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +extern void init_cbrt(py::module_ m); + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/ceil.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/ceil.cpp new file mode 100644 index 0000000000..b4a0c90156 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/ceil.cpp @@ -0,0 +1,119 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#include "dpctl4pybind11.hpp" +#include +#include +#include +#include +#include + +#include "ceil.hpp" +#include "elementwise_functions.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/ceil.hpp" +#include "kernels/elementwise_functions/common.hpp" + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +// U09: ==== CEIL (x) +namespace impl +{ + +namespace ceil_fn_ns = dpctl::tensor::kernels::ceil; + +static unary_contig_impl_fn_ptr_t ceil_contig_dispatch_vector[td_ns::num_types]; +static int ceil_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + ceil_strided_dispatch_vector[td_ns::num_types]; + +void populate_ceil_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = ceil_fn_ns; + + using fn_ns::CeilContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(ceil_contig_dispatch_vector); + + using fn_ns::CeilStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(ceil_strided_dispatch_vector); + + using fn_ns::CeilTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(ceil_output_typeid_vector); +}; + +} // namespace impl + +void init_ceil(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_ceil_dispatch_vectors(); + using impl::ceil_contig_dispatch_vector; + using impl::ceil_output_typeid_vector; + using impl::ceil_strided_dispatch_vector; + + auto ceil_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, ceil_output_typeid_vector, + ceil_contig_dispatch_vector, ceil_strided_dispatch_vector); + }; + m.def("_ceil", ceil_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto ceil_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, ceil_output_typeid_vector); + }; + m.def("_ceil_result_type", ceil_result_type_pyapi); + } +} + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/ceil.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/ceil.hpp new file mode 100644 index 0000000000..4a6caf999b --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/ceil.hpp @@ -0,0 +1,44 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +extern void init_ceil(py::module_ m); + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/conj.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/conj.cpp new file mode 100644 index 0000000000..baec06fcb0 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/conj.cpp @@ -0,0 +1,119 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#include "dpctl4pybind11.hpp" +#include +#include +#include +#include +#include + +#include "conj.hpp" +#include "elementwise_functions.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/conj.hpp" + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +// U10: ==== CONJ (x) +namespace impl +{ + +namespace conj_fn_ns = dpctl::tensor::kernels::conj; + +static unary_contig_impl_fn_ptr_t conj_contig_dispatch_vector[td_ns::num_types]; +static int conj_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + conj_strided_dispatch_vector[td_ns::num_types]; + +void populate_conj_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = conj_fn_ns; + + using fn_ns::ConjContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(conj_contig_dispatch_vector); + + using fn_ns::ConjStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(conj_strided_dispatch_vector); + + using fn_ns::ConjTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(conj_output_typeid_vector); +}; + +} // namespace impl + +void init_conj(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_conj_dispatch_vectors(); + using impl::conj_contig_dispatch_vector; + using impl::conj_output_typeid_vector; + using impl::conj_strided_dispatch_vector; + + auto conj_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, conj_output_typeid_vector, + conj_contig_dispatch_vector, conj_strided_dispatch_vector); + }; + m.def("_conj", conj_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto conj_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, conj_output_typeid_vector); + }; + m.def("_conj_result_type", conj_result_type_pyapi); + } +} + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/conj.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/conj.hpp new file mode 100644 index 0000000000..33d9993019 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/conj.hpp @@ -0,0 +1,42 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +extern void init_conj(py::module_ m); + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/copysign.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/copysign.cpp new file mode 100644 index 0000000000..0983356d56 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/copysign.cpp @@ -0,0 +1,140 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#include "dpctl4pybind11.hpp" +#include +#include +#include +#include +#include + +#include "copysign.hpp" +#include "elementwise_functions.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/copysign.hpp" + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::binary_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_strided_impl_fn_ptr_t; + +// B??: ===== COPYSIGN (x1, x2) +namespace impl +{ +namespace copysign_fn_ns = dpctl::tensor::kernels::copysign; + +static binary_contig_impl_fn_ptr_t + copysign_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static int copysign_output_id_table[td_ns::num_types][td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + copysign_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +void populate_copysign_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = copysign_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::CopysignTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(copysign_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::CopysignStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(copysign_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::CopysignContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(copysign_contig_dispatch_table); +}; + +} // namespace impl + +void init_copysign(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_copysign_dispatch_tables(); + using impl::copysign_contig_dispatch_table; + using impl::copysign_output_id_table; + using impl::copysign_strided_dispatch_table; + + auto copysign_pyapi = [&](const arrayT &src1, const arrayT &src2, + const arrayT &dst, sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, copysign_output_id_table, + // function pointers to handle operation on contiguous arrays + // (pointers may be nullptr) + copysign_contig_dispatch_table, + // function pointers to handle operation on strided arrays (most + // general case) + copysign_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + auto copysign_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + copysign_output_id_table); + }; + m.def("_copysign", copysign_pyapi, "", py::arg("src1"), py::arg("src2"), + py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_copysign_result_type", copysign_result_type_pyapi, ""); + } +} + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/copysign.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/copysign.hpp new file mode 100644 index 0000000000..d22cbdb0f0 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/copysign.hpp @@ -0,0 +1,42 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +extern void init_copysign(py::module_ m); + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/cos.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/cos.cpp new file mode 100644 index 0000000000..86d36a35c5 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/cos.cpp @@ -0,0 +1,119 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#include "dpctl4pybind11.hpp" +#include +#include +#include +#include +#include + +#include "cos.hpp" +#include "elementwise_functions.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/cos.hpp" + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +// U11: ==== COS (x) +namespace impl +{ + +namespace cos_fn_ns = dpctl::tensor::kernels::cos; + +static unary_contig_impl_fn_ptr_t cos_contig_dispatch_vector[td_ns::num_types]; +static int cos_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + cos_strided_dispatch_vector[td_ns::num_types]; + +void populate_cos_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = cos_fn_ns; + + using fn_ns::CosContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(cos_contig_dispatch_vector); + + using fn_ns::CosStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(cos_strided_dispatch_vector); + + using fn_ns::CosTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(cos_output_typeid_vector); +}; + +} // namespace impl + +void init_cos(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_cos_dispatch_vectors(); + using impl::cos_contig_dispatch_vector; + using impl::cos_output_typeid_vector; + using impl::cos_strided_dispatch_vector; + + auto cos_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, cos_output_typeid_vector, + cos_contig_dispatch_vector, cos_strided_dispatch_vector); + }; + m.def("_cos", cos_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto cos_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, cos_output_typeid_vector); + }; + m.def("_cos_result_type", cos_result_type_pyapi); + } +} + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions4.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/cos.hpp similarity index 95% rename from dpctl/tensor/libtensor/source/elementwise_functions4.hpp rename to dpctl/tensor/libtensor/source/elementwise_functions/cos.hpp index 920221d644..1753058024 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions4.hpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/cos.hpp @@ -26,6 +26,8 @@ #pragma once #include +namespace py = pybind11; + namespace dpctl { namespace tensor @@ -33,7 +35,7 @@ namespace tensor namespace py_internal { -extern void init_elementwise_functions4(py::module_ m); +extern void init_cos(py::module_ m); } // namespace py_internal } // namespace tensor diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/cosh.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/cosh.cpp new file mode 100644 index 0000000000..643415abcb --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/cosh.cpp @@ -0,0 +1,119 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#include "dpctl4pybind11.hpp" +#include +#include +#include +#include +#include + +#include "cosh.hpp" +#include "elementwise_functions.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/cosh.hpp" + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +// U12: ==== COSH (x) +namespace impl +{ + +namespace cosh_fn_ns = dpctl::tensor::kernels::cosh; + +static unary_contig_impl_fn_ptr_t cosh_contig_dispatch_vector[td_ns::num_types]; +static int cosh_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + cosh_strided_dispatch_vector[td_ns::num_types]; + +void populate_cosh_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = cosh_fn_ns; + + using fn_ns::CoshContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(cosh_contig_dispatch_vector); + + using fn_ns::CoshStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(cosh_strided_dispatch_vector); + + using fn_ns::CoshTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(cosh_output_typeid_vector); +}; + +} // namespace impl + +void init_cosh(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_cosh_dispatch_vectors(); + using impl::cosh_contig_dispatch_vector; + using impl::cosh_output_typeid_vector; + using impl::cosh_strided_dispatch_vector; + + auto cosh_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, cosh_output_typeid_vector, + cosh_contig_dispatch_vector, cosh_strided_dispatch_vector); + }; + m.def("_cosh", cosh_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto cosh_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, cosh_output_typeid_vector); + }; + m.def("_cosh_result_type", cosh_result_type_pyapi); + } +} + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/cosh.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/cosh.hpp new file mode 100644 index 0000000000..c1eba05ea5 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/cosh.hpp @@ -0,0 +1,42 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +extern void init_cosh(py::module_ m); + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/elementwise_common.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/elementwise_common.cpp new file mode 100644 index 0000000000..ecd3c15ae7 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/elementwise_common.cpp @@ -0,0 +1,248 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#include + +#include "abs.hpp" +#include "acos.hpp" +#include "acosh.hpp" +#include "add.hpp" +#include "asin.hpp" +#include "asinh.hpp" +#include "atan.hpp" +#include "atan2.hpp" +#include "atanh.hpp" +#include "bitwise_and.hpp" +#include "bitwise_invert.hpp" +#include "bitwise_left_shift.hpp" +#include "bitwise_or.hpp" +#include "bitwise_right_shift.hpp" +#include "bitwise_xor.hpp" +#include "cbrt.hpp" +#include "ceil.hpp" +#include "conj.hpp" +#include "copysign.hpp" +#include "cos.hpp" +#include "cosh.hpp" +#include "equal.hpp" +#include "exp.hpp" +#include "exp2.hpp" +#include "expm1.hpp" +#include "floor.hpp" +#include "floor_divide.hpp" +#include "greater.hpp" +#include "greater_equal.hpp" +#include "hypot.hpp" +#include "imag.hpp" +#include "isfinite.hpp" +#include "isinf.hpp" +#include "isnan.hpp" +#include "less.hpp" +#include "less_equal.hpp" +#include "log.hpp" +#include "log10.hpp" +#include "log1p.hpp" +#include "log2.hpp" +#include "logaddexp.hpp" +#include "logical_and.hpp" +#include "logical_not.hpp" +#include "logical_or.hpp" +#include "logical_xor.hpp" +#include "maximum.hpp" +#include "minimum.hpp" +#include "multiply.hpp" +#include "negative.hpp" +#include "not_equal.hpp" +#include "positive.hpp" +#include "pow.hpp" +#include "proj.hpp" +#include "real.hpp" +#include "remainder.hpp" +#include "round.hpp" +#include "rsqrt.hpp" +#include "sign.hpp" +#include "signbit.hpp" +#include "sin.hpp" +#include "sinh.hpp" +#include "sqrt.hpp" +#include "square.hpp" +#include "subtract.hpp" +#include "tan.hpp" +#include "tanh.hpp" +#include "true_divide.hpp" +#include "trunc.hpp" + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +void init_elementwise_functions(py::module_ m) +{ + using dpctl::tensor::py_internal::init_abs; + init_abs(m); + using dpctl::tensor::py_internal::init_acos; + init_acos(m); + using dpctl::tensor::py_internal::init_acosh; + init_acosh(m); + using dpctl::tensor::py_internal::init_add; + init_add(m); + using dpctl::tensor::py_internal::init_asin; + init_asin(m); + using dpctl::tensor::py_internal::init_asinh; + init_asinh(m); + using dpctl::tensor::py_internal::init_atan; + init_atan(m); + using dpctl::tensor::py_internal::init_atan2; + init_atan2(m); + using dpctl::tensor::py_internal::init_atanh; + init_atanh(m); + using dpctl::tensor::py_internal::init_bitwise_and; + init_bitwise_and(m); + using dpctl::tensor::py_internal::init_bitwise_invert; + init_bitwise_invert(m); + using dpctl::tensor::py_internal::init_bitwise_left_shift; + init_bitwise_left_shift(m); + using dpctl::tensor::py_internal::init_bitwise_or; + init_bitwise_or(m); + using dpctl::tensor::py_internal::init_bitwise_right_shift; + init_bitwise_right_shift(m); + using dpctl::tensor::py_internal::init_bitwise_xor; + init_bitwise_xor(m); + using dpctl::tensor::py_internal::init_cbrt; + init_cbrt(m); + using dpctl::tensor::py_internal::init_ceil; + init_ceil(m); + using dpctl::tensor::py_internal::init_conj; + init_conj(m); + using dpctl::tensor::py_internal::init_copysign; + init_copysign(m); + using dpctl::tensor::py_internal::init_cos; + init_cos(m); + using dpctl::tensor::py_internal::init_cosh; + init_cosh(m); + using dpctl::tensor::py_internal::init_equal; + init_equal(m); + using dpctl::tensor::py_internal::init_exp; + init_exp(m); + using dpctl::tensor::py_internal::init_exp2; + init_exp2(m); + using dpctl::tensor::py_internal::init_expm1; + init_expm1(m); + using dpctl::tensor::py_internal::init_floor; + init_floor(m); + using dpctl::tensor::py_internal::init_floor_divide; + init_floor_divide(m); + using dpctl::tensor::py_internal::init_greater; + init_greater(m); + using dpctl::tensor::py_internal::init_greater_equal; + init_greater_equal(m); + using dpctl::tensor::py_internal::init_hypot; + init_hypot(m); + using dpctl::tensor::py_internal::init_imag; + init_imag(m); + using dpctl::tensor::py_internal::init_isfinite; + init_isfinite(m); + using dpctl::tensor::py_internal::init_isinf; + init_isinf(m); + using dpctl::tensor::py_internal::init_isnan; + init_isnan(m); + using dpctl::tensor::py_internal::init_less; + init_less(m); + using dpctl::tensor::py_internal::init_less_equal; + init_less_equal(m); + using dpctl::tensor::py_internal::init_log; + init_log(m); + using dpctl::tensor::py_internal::init_log10; + init_log10(m); + using dpctl::tensor::py_internal::init_log1p; + init_log1p(m); + using dpctl::tensor::py_internal::init_log2; + init_log2(m); + using dpctl::tensor::py_internal::init_logaddexp; + init_logaddexp(m); + using dpctl::tensor::py_internal::init_logical_and; + init_logical_and(m); + using dpctl::tensor::py_internal::init_logical_not; + init_logical_not(m); + using dpctl::tensor::py_internal::init_logical_or; + init_logical_or(m); + using dpctl::tensor::py_internal::init_logical_xor; + init_logical_xor(m); + using dpctl::tensor::py_internal::init_maximum; + init_maximum(m); + using dpctl::tensor::py_internal::init_minimum; + init_minimum(m); + using dpctl::tensor::py_internal::init_multiply; + init_multiply(m); + using dpctl::tensor::py_internal::init_negative; + init_negative(m); + using dpctl::tensor::py_internal::init_not_equal; + init_not_equal(m); + using dpctl::tensor::py_internal::init_positive; + init_positive(m); + using dpctl::tensor::py_internal::init_pow; + init_pow(m); + using dpctl::tensor::py_internal::init_proj; + init_proj(m); + using dpctl::tensor::py_internal::init_real; + init_real(m); + using dpctl::tensor::py_internal::init_remainder; + init_remainder(m); + using dpctl::tensor::py_internal::init_round; + init_round(m); + using dpctl::tensor::py_internal::init_rsqrt; + init_rsqrt(m); + using dpctl::tensor::py_internal::init_sign; + init_sign(m); + using dpctl::tensor::py_internal::init_signbit; + init_signbit(m); + using dpctl::tensor::py_internal::init_sin; + init_sin(m); + using dpctl::tensor::py_internal::init_sinh; + init_sinh(m); + using dpctl::tensor::py_internal::init_sqrt; + init_sqrt(m); + using dpctl::tensor::py_internal::init_square; + init_square(m); + using dpctl::tensor::py_internal::init_subtract; + init_subtract(m); + using dpctl::tensor::py_internal::init_tan; + init_tan(m); + using dpctl::tensor::py_internal::init_tanh; + init_tanh(m); + using dpctl::tensor::py_internal::init_divide; + init_divide(m); + using dpctl::tensor::py_internal::init_trunc; + init_trunc(m); +} + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/elementwise_common.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/elementwise_common.hpp new file mode 100644 index 0000000000..899c3faf22 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/elementwise_common.hpp @@ -0,0 +1,42 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +extern void init_elementwise_functions(py::module_ m); + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions.hpp similarity index 99% rename from dpctl/tensor/libtensor/source/elementwise_functions.hpp rename to dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions.hpp index 198aad61e8..3ac74de3c8 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions.hpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions.hpp @@ -37,6 +37,7 @@ #include "utils/offset_utils.hpp" #include "utils/type_dispatch.hpp" +namespace py = pybind11; namespace td_ns = dpctl::tensor::type_dispatch; static inline py::dtype _dtype_from_typenum(td_ns::typenum_t dst_typenum_t) diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/equal.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/equal.cpp new file mode 100644 index 0000000000..7af3e15d5c --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/equal.cpp @@ -0,0 +1,140 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#include "dpctl4pybind11.hpp" +#include +#include +#include +#include +#include + +#include "elementwise_functions.hpp" +#include "equal.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/equal.hpp" + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::binary_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_strided_impl_fn_ptr_t; + +// B09: ===== EQUAL (x1, x2) +namespace impl +{ +namespace equal_fn_ns = dpctl::tensor::kernels::equal; + +static binary_contig_impl_fn_ptr_t + equal_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static int equal_output_id_table[td_ns::num_types][td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + equal_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +void populate_equal_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = equal_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::EqualTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(equal_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::EqualStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(equal_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::EqualContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(equal_contig_dispatch_table); +}; + +} // namespace impl + +void init_equal(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_equal_dispatch_tables(); + using impl::equal_contig_dispatch_table; + using impl::equal_output_id_table; + using impl::equal_strided_dispatch_table; + + auto equal_pyapi = [&](const arrayT &src1, const arrayT &src2, + const arrayT &dst, sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, equal_output_id_table, + // function pointers to handle operation on contiguous arrays + // (pointers may be nullptr) + equal_contig_dispatch_table, + // function pointers to handle operation on strided arrays (most + // general case) + equal_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + auto equal_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + equal_output_id_table); + }; + m.def("_equal", equal_pyapi, "", py::arg("src1"), py::arg("src2"), + py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_equal_result_type", equal_result_type_pyapi, ""); + } +} + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/equal.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/equal.hpp new file mode 100644 index 0000000000..21ac4ad6b4 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/equal.hpp @@ -0,0 +1,42 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +extern void init_equal(py::module_ m); + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/exp.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/exp.cpp new file mode 100644 index 0000000000..4f550268e1 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/exp.cpp @@ -0,0 +1,119 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#include "dpctl4pybind11.hpp" +#include +#include +#include +#include +#include + +#include "elementwise_functions.hpp" +#include "exp.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/exp.hpp" + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +// U13: ==== EXP (x) +namespace impl +{ + +namespace exp_fn_ns = dpctl::tensor::kernels::exp; + +static unary_contig_impl_fn_ptr_t exp_contig_dispatch_vector[td_ns::num_types]; +static int exp_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + exp_strided_dispatch_vector[td_ns::num_types]; + +void populate_exp_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = exp_fn_ns; + + using fn_ns::ExpContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(exp_contig_dispatch_vector); + + using fn_ns::ExpStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(exp_strided_dispatch_vector); + + using fn_ns::ExpTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(exp_output_typeid_vector); +}; + +} // namespace impl + +void init_exp(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_exp_dispatch_vectors(); + using impl::exp_contig_dispatch_vector; + using impl::exp_output_typeid_vector; + using impl::exp_strided_dispatch_vector; + + auto exp_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, exp_output_typeid_vector, + exp_contig_dispatch_vector, exp_strided_dispatch_vector); + }; + m.def("_exp", exp_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto exp_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, exp_output_typeid_vector); + }; + m.def("_exp_result_type", exp_result_type_pyapi); + } +} + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions2.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/exp.hpp similarity index 95% rename from dpctl/tensor/libtensor/source/elementwise_functions2.hpp rename to dpctl/tensor/libtensor/source/elementwise_functions/exp.hpp index b712ddd136..7227f0a2dc 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions2.hpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/exp.hpp @@ -26,6 +26,8 @@ #pragma once #include +namespace py = pybind11; + namespace dpctl { namespace tensor @@ -33,7 +35,7 @@ namespace tensor namespace py_internal { -extern void init_elementwise_functions2(py::module_ m); +extern void init_exp(py::module_ m); } // namespace py_internal } // namespace tensor diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/exp2.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/exp2.cpp new file mode 100644 index 0000000000..8454ce5c77 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/exp2.cpp @@ -0,0 +1,119 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#include "dpctl4pybind11.hpp" +#include +#include +#include +#include +#include + +#include "elementwise_functions.hpp" +#include "exp2.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/exp2.hpp" + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +// U??: ==== EXP2 (x) +namespace impl +{ + +namespace exp2_fn_ns = dpctl::tensor::kernels::exp2; + +static unary_contig_impl_fn_ptr_t exp2_contig_dispatch_vector[td_ns::num_types]; +static int exp2_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + exp2_strided_dispatch_vector[td_ns::num_types]; + +void populate_exp2_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = exp2_fn_ns; + + using fn_ns::Exp2ContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(exp2_contig_dispatch_vector); + + using fn_ns::Exp2StridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(exp2_strided_dispatch_vector); + + using fn_ns::Exp2TypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(exp2_output_typeid_vector); +}; + +} // namespace impl + +void init_exp2(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_exp2_dispatch_vectors(); + using impl::exp2_contig_dispatch_vector; + using impl::exp2_output_typeid_vector; + using impl::exp2_strided_dispatch_vector; + + auto exp2_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, exp2_output_typeid_vector, + exp2_contig_dispatch_vector, exp2_strided_dispatch_vector); + }; + m.def("_exp2", exp2_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto exp2_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, exp2_output_typeid_vector); + }; + m.def("_exp2_result_type", exp2_result_type_pyapi); + } +} + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/exp2.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/exp2.hpp new file mode 100644 index 0000000000..be041e1f8d --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/exp2.hpp @@ -0,0 +1,42 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +extern void init_exp2(py::module_ m); + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/expm1.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/expm1.cpp new file mode 100644 index 0000000000..16a6f5fb62 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/expm1.cpp @@ -0,0 +1,121 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#include "dpctl4pybind11.hpp" +#include +#include +#include +#include +#include + +#include "elementwise_functions.hpp" +#include "expm1.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/expm1.hpp" + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +// U14: ==== EXPM1 (x) +namespace impl +{ + +namespace expm1_fn_ns = dpctl::tensor::kernels::expm1; + +static unary_contig_impl_fn_ptr_t + expm1_contig_dispatch_vector[td_ns::num_types]; +static int expm1_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + expm1_strided_dispatch_vector[td_ns::num_types]; + +void populate_expm1_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = expm1_fn_ns; + + using fn_ns::Expm1ContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(expm1_contig_dispatch_vector); + + using fn_ns::Expm1StridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(expm1_strided_dispatch_vector); + + using fn_ns::Expm1TypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(expm1_output_typeid_vector); +}; + +} // namespace impl + +void init_expm1(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_expm1_dispatch_vectors(); + using impl::expm1_contig_dispatch_vector; + using impl::expm1_output_typeid_vector; + using impl::expm1_strided_dispatch_vector; + + auto expm1_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, expm1_output_typeid_vector, + expm1_contig_dispatch_vector, expm1_strided_dispatch_vector); + }; + m.def("_expm1", expm1_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto expm1_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, + expm1_output_typeid_vector); + }; + m.def("_expm1_result_type", expm1_result_type_pyapi); + } +} + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/expm1.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/expm1.hpp new file mode 100644 index 0000000000..6e39644835 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/expm1.hpp @@ -0,0 +1,42 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +extern void init_expm1(py::module_ m); + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/floor.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/floor.cpp new file mode 100644 index 0000000000..4279f32529 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/floor.cpp @@ -0,0 +1,121 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#include "dpctl4pybind11.hpp" +#include +#include +#include +#include +#include + +#include "elementwise_functions.hpp" +#include "floor.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/floor.hpp" + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +// U15: ==== FLOOR (x) +namespace impl +{ + +namespace floor_fn_ns = dpctl::tensor::kernels::floor; + +static unary_contig_impl_fn_ptr_t + floor_contig_dispatch_vector[td_ns::num_types]; +static int floor_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + floor_strided_dispatch_vector[td_ns::num_types]; + +void populate_floor_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = floor_fn_ns; + + using fn_ns::FloorContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(floor_contig_dispatch_vector); + + using fn_ns::FloorStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(floor_strided_dispatch_vector); + + using fn_ns::FloorTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(floor_output_typeid_vector); +}; + +} // namespace impl + +void init_floor(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_floor_dispatch_vectors(); + using impl::floor_contig_dispatch_vector; + using impl::floor_output_typeid_vector; + using impl::floor_strided_dispatch_vector; + + auto floor_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, floor_output_typeid_vector, + floor_contig_dispatch_vector, floor_strided_dispatch_vector); + }; + m.def("_floor", floor_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto floor_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, + floor_output_typeid_vector); + }; + m.def("_floor_result_type", floor_result_type_pyapi); + } +} + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/floor.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/floor.hpp new file mode 100644 index 0000000000..b742b058ad --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/floor.hpp @@ -0,0 +1,42 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +extern void init_floor(py::module_ m); + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/floor_divide.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/floor_divide.cpp new file mode 100644 index 0000000000..b453d5bad0 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/floor_divide.cpp @@ -0,0 +1,190 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#include "dpctl4pybind11.hpp" +#include +#include +#include +#include +#include + +#include "elementwise_functions.hpp" +#include "floor_divide.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/common_inplace.hpp" +#include "kernels/elementwise_functions/floor_divide.hpp" + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::binary_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_strided_impl_fn_ptr_t; + +using ew_cmn_ns::binary_inplace_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_inplace_row_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_inplace_strided_impl_fn_ptr_t; + +// B10: ===== FLOOR_DIVIDE (x1, x2) +namespace impl +{ +namespace floor_divide_fn_ns = dpctl::tensor::kernels::floor_divide; + +static binary_contig_impl_fn_ptr_t + floor_divide_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static int floor_divide_output_id_table[td_ns::num_types][td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + floor_divide_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +static binary_inplace_contig_impl_fn_ptr_t + floor_divide_inplace_contig_dispatch_table[td_ns::num_types] + [td_ns::num_types]; +static binary_inplace_strided_impl_fn_ptr_t + floor_divide_inplace_strided_dispatch_table[td_ns::num_types] + [td_ns::num_types]; + +void populate_floor_divide_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = floor_divide_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::FloorDivideTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(floor_divide_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::FloorDivideStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(floor_divide_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::FloorDivideContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(floor_divide_contig_dispatch_table); + + // function pointers for inplace operation on general strided arrays + using fn_ns::FloorDivideInplaceStridedFactory; + DispatchTableBuilder + dtb4; + dtb4.populate_dispatch_table(floor_divide_inplace_strided_dispatch_table); + + // function pointers for inplace operation on contiguous inputs and output + using fn_ns::FloorDivideInplaceContigFactory; + DispatchTableBuilder + dtb5; + dtb5.populate_dispatch_table(floor_divide_inplace_contig_dispatch_table); +}; + +} // namespace impl + +void init_floor_divide(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_floor_divide_dispatch_tables(); + using impl::floor_divide_contig_dispatch_table; + using impl::floor_divide_output_id_table; + using impl::floor_divide_strided_dispatch_table; + + auto floor_divide_pyapi = [&](const arrayT &src1, const arrayT &src2, + const arrayT &dst, sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, floor_divide_output_id_table, + // function pointers to handle operation on contiguous arrays + // (pointers may be nullptr) + floor_divide_contig_dispatch_table, + // function pointers to handle operation on strided arrays (most + // general case) + floor_divide_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + auto floor_divide_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + floor_divide_output_id_table); + }; + m.def("_floor_divide", floor_divide_pyapi, "", py::arg("src1"), + py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_floor_divide_result_type", floor_divide_result_type_pyapi, ""); + + using impl::floor_divide_inplace_contig_dispatch_table; + using impl::floor_divide_inplace_strided_dispatch_table; + + auto floor_divide_inplace_pyapi = + [&](const arrayT &src, const arrayT &dst, sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_binary_inplace_ufunc( + src, dst, exec_q, depends, floor_divide_output_id_table, + // function pointers to handle inplace operation on + // contiguous arrays (pointers may be nullptr) + floor_divide_inplace_contig_dispatch_table, + // function pointers to handle inplace operation on strided + // arrays (most general case) + floor_divide_inplace_strided_dispatch_table, + // function pointers to handle inplace operation on + // c-contig matrix with c-contig row with broadcasting + // (may be nullptr) + td_ns::NullPtrTable< + binary_inplace_row_matrix_broadcast_impl_fn_ptr_t>{}); + }; + m.def("_floor_divide_inplace", floor_divide_inplace_pyapi, "", + py::arg("lhs"), py::arg("rhs"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + } +} + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/floor_divide.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/floor_divide.hpp new file mode 100644 index 0000000000..c7f0d40dcc --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/floor_divide.hpp @@ -0,0 +1,42 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +extern void init_floor_divide(py::module_ m); + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/greater.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/greater.cpp new file mode 100644 index 0000000000..9aee4afe20 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/greater.cpp @@ -0,0 +1,140 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#include "dpctl4pybind11.hpp" +#include +#include +#include +#include +#include + +#include "elementwise_functions.hpp" +#include "greater.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/greater.hpp" + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::binary_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_strided_impl_fn_ptr_t; + +// B11: ===== GREATER (x1, x2) +namespace impl +{ +namespace greater_fn_ns = dpctl::tensor::kernels::greater; + +static binary_contig_impl_fn_ptr_t + greater_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static int greater_output_id_table[td_ns::num_types][td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + greater_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +void populate_greater_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = greater_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::GreaterTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(greater_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::GreaterStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(greater_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::GreaterContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(greater_contig_dispatch_table); +}; + +} // namespace impl + +void init_greater(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_greater_dispatch_tables(); + using impl::greater_contig_dispatch_table; + using impl::greater_output_id_table; + using impl::greater_strided_dispatch_table; + + auto greater_pyapi = [&](const arrayT &src1, const arrayT &src2, + const arrayT &dst, sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, greater_output_id_table, + // function pointers to handle operation on contiguous arrays + // (pointers may be nullptr) + greater_contig_dispatch_table, + // function pointers to handle operation on strided arrays (most + // general case) + greater_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + auto greater_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + greater_output_id_table); + }; + m.def("_greater", greater_pyapi, "", py::arg("src1"), py::arg("src2"), + py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_greater_result_type", greater_result_type_pyapi, ""); + } +} + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/greater.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/greater.hpp new file mode 100644 index 0000000000..ba8dc57bb0 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/greater.hpp @@ -0,0 +1,42 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +extern void init_greater(py::module_ m); + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/greater_equal.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/greater_equal.cpp new file mode 100644 index 0000000000..b1ec735c17 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/greater_equal.cpp @@ -0,0 +1,141 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#include "dpctl4pybind11.hpp" +#include +#include +#include +#include +#include + +#include "elementwise_functions.hpp" +#include "greater_equal.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/greater_equal.hpp" + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::binary_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_strided_impl_fn_ptr_t; + +// B12: ===== GREATER_EQUAL (x1, x2) +namespace impl +{ +namespace greater_equal_fn_ns = dpctl::tensor::kernels::greater_equal; + +static binary_contig_impl_fn_ptr_t + greater_equal_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static int greater_equal_output_id_table[td_ns::num_types][td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + greater_equal_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +void populate_greater_equal_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = greater_equal_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::GreaterEqualTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(greater_equal_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::GreaterEqualStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(greater_equal_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::GreaterEqualContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(greater_equal_contig_dispatch_table); +}; + +} // namespace impl + +void init_greater_equal(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_greater_equal_dispatch_tables(); + using impl::greater_equal_contig_dispatch_table; + using impl::greater_equal_output_id_table; + using impl::greater_equal_strided_dispatch_table; + + auto greater_equal_pyapi = [&](const arrayT &src1, const arrayT &src2, + const arrayT &dst, sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, greater_equal_output_id_table, + // function pointers to handle operation on contiguous arrays + // (pointers may be nullptr) + greater_equal_contig_dispatch_table, + // function pointers to handle operation on strided arrays (most + // general case) + greater_equal_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + auto greater_equal_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + greater_equal_output_id_table); + }; + m.def("_greater_equal", greater_equal_pyapi, "", py::arg("src1"), + py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_greater_equal_result_type", greater_equal_result_type_pyapi, + ""); + } +} + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/greater_equal.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/greater_equal.hpp new file mode 100644 index 0000000000..2cf116566e --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/greater_equal.hpp @@ -0,0 +1,42 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +extern void init_greater_equal(py::module_ m); + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/hypot.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/hypot.cpp new file mode 100644 index 0000000000..87c3c7a553 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/hypot.cpp @@ -0,0 +1,140 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#include "dpctl4pybind11.hpp" +#include +#include +#include +#include +#include + +#include "elementwise_functions.hpp" +#include "hypot.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/hypot.hpp" + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::binary_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_strided_impl_fn_ptr_t; + +// B??: ===== HYPOT (x1, x2) +namespace impl +{ +namespace hypot_fn_ns = dpctl::tensor::kernels::hypot; + +static binary_contig_impl_fn_ptr_t + hypot_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static int hypot_output_id_table[td_ns::num_types][td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + hypot_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +void populate_hypot_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = hypot_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::HypotTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(hypot_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::HypotStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(hypot_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::HypotContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(hypot_contig_dispatch_table); +}; + +} // namespace impl + +void init_hypot(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_hypot_dispatch_tables(); + using impl::hypot_contig_dispatch_table; + using impl::hypot_output_id_table; + using impl::hypot_strided_dispatch_table; + + auto hypot_pyapi = [&](const arrayT &src1, const arrayT &src2, + const arrayT &dst, sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, hypot_output_id_table, + // function pointers to handle operation on contiguous arrays + // (pointers may be nullptr) + hypot_contig_dispatch_table, + // function pointers to handle operation on strided arrays (most + // general case) + hypot_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + auto hypot_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + hypot_output_id_table); + }; + m.def("_hypot", hypot_pyapi, "", py::arg("src1"), py::arg("src2"), + py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_hypot_result_type", hypot_result_type_pyapi, ""); + } +} + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/hypot.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/hypot.hpp new file mode 100644 index 0000000000..2d154917ea --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/hypot.hpp @@ -0,0 +1,42 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +extern void init_hypot(py::module_ m); + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/imag.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/imag.cpp new file mode 100644 index 0000000000..33c57ebea3 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/imag.cpp @@ -0,0 +1,119 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#include "dpctl4pybind11.hpp" +#include +#include +#include +#include +#include + +#include "elementwise_functions.hpp" +#include "imag.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/imag.hpp" + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +// U16: ==== IMAG (x) +namespace impl +{ + +namespace imag_fn_ns = dpctl::tensor::kernels::imag; + +static unary_contig_impl_fn_ptr_t imag_contig_dispatch_vector[td_ns::num_types]; +static int imag_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + imag_strided_dispatch_vector[td_ns::num_types]; + +void populate_imag_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = imag_fn_ns; + + using fn_ns::ImagContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(imag_contig_dispatch_vector); + + using fn_ns::ImagStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(imag_strided_dispatch_vector); + + using fn_ns::ImagTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(imag_output_typeid_vector); +}; + +} // namespace impl + +void init_imag(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_imag_dispatch_vectors(); + using impl::imag_contig_dispatch_vector; + using impl::imag_output_typeid_vector; + using impl::imag_strided_dispatch_vector; + + auto imag_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, imag_output_typeid_vector, + imag_contig_dispatch_vector, imag_strided_dispatch_vector); + }; + m.def("_imag", imag_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto imag_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, imag_output_typeid_vector); + }; + m.def("_imag_result_type", imag_result_type_pyapi); + } +} + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/imag.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/imag.hpp new file mode 100644 index 0000000000..ffac3f2465 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/imag.hpp @@ -0,0 +1,42 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +extern void init_isfinite(py::module_ m); + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/isfinite.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/isfinite.cpp new file mode 100644 index 0000000000..000a4a3a15 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/isfinite.cpp @@ -0,0 +1,122 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#include "dpctl4pybind11.hpp" +#include +#include +#include +#include +#include + +#include "elementwise_functions.hpp" +#include "isfinite.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/isfinite.hpp" + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +// U17: ==== ISFINITE (x) +namespace impl +{ + +namespace isfinite_fn_ns = dpctl::tensor::kernels::isfinite; + +static unary_contig_impl_fn_ptr_t + isfinite_contig_dispatch_vector[td_ns::num_types]; +static int isfinite_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + isfinite_strided_dispatch_vector[td_ns::num_types]; + +void populate_isfinite_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = isfinite_fn_ns; + + using fn_ns::IsFiniteContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(isfinite_contig_dispatch_vector); + + using fn_ns::IsFiniteStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(isfinite_strided_dispatch_vector); + + using fn_ns::IsFiniteTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(isfinite_output_typeid_vector); +}; + +} // namespace impl + +void init_isfinite(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_isfinite_dispatch_vectors(); + using impl::isfinite_contig_dispatch_vector; + using impl::isfinite_output_typeid_vector; + using impl::isfinite_strided_dispatch_vector; + + auto isfinite_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc(src, dst, exec_q, depends, + isfinite_output_typeid_vector, + isfinite_contig_dispatch_vector, + isfinite_strided_dispatch_vector); + }; + m.def("_isfinite", isfinite_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto isfinite_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, + isfinite_output_typeid_vector); + }; + m.def("_isfinite_result_type", isfinite_result_type_pyapi); + } +} + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/isfinite.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/isfinite.hpp new file mode 100644 index 0000000000..fd7508792b --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/isfinite.hpp @@ -0,0 +1,42 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +extern void init_imag(py::module_ m); + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/isinf.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/isinf.cpp new file mode 100644 index 0000000000..7235c3cca4 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/isinf.cpp @@ -0,0 +1,121 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#include "dpctl4pybind11.hpp" +#include +#include +#include +#include +#include + +#include "elementwise_functions.hpp" +#include "isinf.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/isinf.hpp" + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +// U18: ==== ISINF (x) +namespace impl +{ + +namespace isinf_fn_ns = dpctl::tensor::kernels::isinf; + +static unary_contig_impl_fn_ptr_t + isinf_contig_dispatch_vector[td_ns::num_types]; +static int isinf_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + isinf_strided_dispatch_vector[td_ns::num_types]; + +void populate_isinf_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = isinf_fn_ns; + + using fn_ns::IsInfContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(isinf_contig_dispatch_vector); + + using fn_ns::IsInfStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(isinf_strided_dispatch_vector); + + using fn_ns::IsInfTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(isinf_output_typeid_vector); +}; + +} // namespace impl + +void init_isinf(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_isinf_dispatch_vectors(); + using impl::isinf_contig_dispatch_vector; + using impl::isinf_output_typeid_vector; + using impl::isinf_strided_dispatch_vector; + + auto isinf_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, isinf_output_typeid_vector, + isinf_contig_dispatch_vector, isinf_strided_dispatch_vector); + }; + m.def("_isinf", isinf_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto isinf_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, + isinf_output_typeid_vector); + }; + m.def("_isinf_result_type", isinf_result_type_pyapi); + } +} + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/isinf.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/isinf.hpp new file mode 100644 index 0000000000..8c3cd51c91 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/isinf.hpp @@ -0,0 +1,42 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +extern void init_isinf(py::module_ m); + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/isnan.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/isnan.cpp new file mode 100644 index 0000000000..8849ff29fa --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/isnan.cpp @@ -0,0 +1,121 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#include "dpctl4pybind11.hpp" +#include +#include +#include +#include +#include + +#include "elementwise_functions.hpp" +#include "isnan.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/isnan.hpp" + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +// U19: ==== ISNAN (x) +namespace impl +{ + +namespace isnan_fn_ns = dpctl::tensor::kernels::isnan; + +static unary_contig_impl_fn_ptr_t + isnan_contig_dispatch_vector[td_ns::num_types]; +static int isnan_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + isnan_strided_dispatch_vector[td_ns::num_types]; + +void populate_isnan_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = isnan_fn_ns; + + using fn_ns::IsNanContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(isnan_contig_dispatch_vector); + + using fn_ns::IsNanStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(isnan_strided_dispatch_vector); + + using fn_ns::IsNanTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(isnan_output_typeid_vector); +}; + +} // namespace impl + +void init_isnan(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_isnan_dispatch_vectors(); + using impl::isnan_contig_dispatch_vector; + using impl::isnan_output_typeid_vector; + using impl::isnan_strided_dispatch_vector; + + auto isnan_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, isnan_output_typeid_vector, + isnan_contig_dispatch_vector, isnan_strided_dispatch_vector); + }; + m.def("_isnan", isnan_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto isnan_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, + isnan_output_typeid_vector); + }; + m.def("_isnan_result_type", isnan_result_type_pyapi); + } +} + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/isnan.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/isnan.hpp new file mode 100644 index 0000000000..df1f41d47f --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/isnan.hpp @@ -0,0 +1,42 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +extern void init_isnan(py::module_ m); + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/less.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/less.cpp new file mode 100644 index 0000000000..48f30538f0 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/less.cpp @@ -0,0 +1,140 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#include "dpctl4pybind11.hpp" +#include +#include +#include +#include +#include + +#include "elementwise_functions.hpp" +#include "less.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/less.hpp" + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::binary_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_strided_impl_fn_ptr_t; + +// B13: ===== LESS (x1, x2) +namespace impl +{ +namespace less_fn_ns = dpctl::tensor::kernels::less; + +static binary_contig_impl_fn_ptr_t less_contig_dispatch_table[td_ns::num_types] + [td_ns::num_types]; +static int less_output_id_table[td_ns::num_types][td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + less_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +void populate_less_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = less_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::LessTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(less_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::LessStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(less_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::LessContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(less_contig_dispatch_table); +}; + +} // namespace impl + +void init_less(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_less_dispatch_tables(); + using impl::less_contig_dispatch_table; + using impl::less_output_id_table; + using impl::less_strided_dispatch_table; + + auto less_pyapi = [&](const arrayT &src1, const arrayT &src2, + const arrayT &dst, sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, less_output_id_table, + // function pointers to handle operation on contiguous arrays + // (pointers may be nullptr) + less_contig_dispatch_table, + // function pointers to handle operation on strided arrays (most + // general case) + less_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + auto less_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + less_output_id_table); + }; + m.def("_less", less_pyapi, "", py::arg("src1"), py::arg("src2"), + py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_less_result_type", less_result_type_pyapi, ""); + } +} + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/less.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/less.hpp new file mode 100644 index 0000000000..dada4b4be7 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/less.hpp @@ -0,0 +1,42 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +extern void init_less(py::module_ m); + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/less_equal.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/less_equal.cpp new file mode 100644 index 0000000000..ce1a60d89f --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/less_equal.cpp @@ -0,0 +1,140 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#include "dpctl4pybind11.hpp" +#include +#include +#include +#include +#include + +#include "elementwise_functions.hpp" +#include "less_equal.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/less_equal.hpp" + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::binary_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_strided_impl_fn_ptr_t; + +// B14: ===== LESS_EQUAL (x1, x2) +namespace impl +{ +namespace less_equal_fn_ns = dpctl::tensor::kernels::less_equal; + +static binary_contig_impl_fn_ptr_t + less_equal_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static int less_equal_output_id_table[td_ns::num_types][td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + less_equal_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +void populate_less_equal_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = less_equal_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::LessEqualTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(less_equal_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::LessEqualStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(less_equal_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::LessEqualContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(less_equal_contig_dispatch_table); +}; + +} // namespace impl + +void init_less_equal(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_less_equal_dispatch_tables(); + using impl::less_equal_contig_dispatch_table; + using impl::less_equal_output_id_table; + using impl::less_equal_strided_dispatch_table; + + auto less_equal_pyapi = [&](const arrayT &src1, const arrayT &src2, + const arrayT &dst, sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, less_equal_output_id_table, + // function pointers to handle operation on contiguous arrays + // (pointers may be nullptr) + less_equal_contig_dispatch_table, + // function pointers to handle operation on strided arrays (most + // general case) + less_equal_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + auto less_equal_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + less_equal_output_id_table); + }; + m.def("_less_equal", less_equal_pyapi, "", py::arg("src1"), + py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_less_equal_result_type", less_equal_result_type_pyapi, ""); + } +} + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/less_equal.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/less_equal.hpp new file mode 100644 index 0000000000..e52ee3b940 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/less_equal.hpp @@ -0,0 +1,42 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +extern void init_less_equal(py::module_ m); + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/log.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/log.cpp new file mode 100644 index 0000000000..2062cb8d59 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/log.cpp @@ -0,0 +1,119 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#include "dpctl4pybind11.hpp" +#include +#include +#include +#include +#include + +#include "elementwise_functions.hpp" +#include "log.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/log.hpp" + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +// U20: ==== LOG (x) +namespace impl +{ + +namespace log_fn_ns = dpctl::tensor::kernels::log; + +static unary_contig_impl_fn_ptr_t log_contig_dispatch_vector[td_ns::num_types]; +static int log_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + log_strided_dispatch_vector[td_ns::num_types]; + +void populate_log_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = log_fn_ns; + + using fn_ns::LogContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(log_contig_dispatch_vector); + + using fn_ns::LogStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(log_strided_dispatch_vector); + + using fn_ns::LogTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(log_output_typeid_vector); +}; + +} // namespace impl + +void init_log(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_log_dispatch_vectors(); + using impl::log_contig_dispatch_vector; + using impl::log_output_typeid_vector; + using impl::log_strided_dispatch_vector; + + auto log_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, log_output_typeid_vector, + log_contig_dispatch_vector, log_strided_dispatch_vector); + }; + m.def("_log", log_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto log_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, log_output_typeid_vector); + }; + m.def("_log_result_type", log_result_type_pyapi); + } +} + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/log.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/log.hpp new file mode 100644 index 0000000000..1ca152d174 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/log.hpp @@ -0,0 +1,42 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +extern void init_log(py::module_ m); + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/log10.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/log10.cpp new file mode 100644 index 0000000000..6e2cf79cc8 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/log10.cpp @@ -0,0 +1,121 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#include "dpctl4pybind11.hpp" +#include +#include +#include +#include +#include + +#include "elementwise_functions.hpp" +#include "log10.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/log10.hpp" + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +// U23: ==== LOG10 (x) +namespace impl +{ + +namespace log10_fn_ns = dpctl::tensor::kernels::log10; + +static unary_contig_impl_fn_ptr_t + log10_contig_dispatch_vector[td_ns::num_types]; +static int log10_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + log10_strided_dispatch_vector[td_ns::num_types]; + +void populate_log10_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = log10_fn_ns; + + using fn_ns::Log10ContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(log10_contig_dispatch_vector); + + using fn_ns::Log10StridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(log10_strided_dispatch_vector); + + using fn_ns::Log10TypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(log10_output_typeid_vector); +}; + +} // namespace impl + +void init_log10(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_log10_dispatch_vectors(); + using impl::log10_contig_dispatch_vector; + using impl::log10_output_typeid_vector; + using impl::log10_strided_dispatch_vector; + + auto log10_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, log10_output_typeid_vector, + log10_contig_dispatch_vector, log10_strided_dispatch_vector); + }; + m.def("_log10", log10_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto log10_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, + log10_output_typeid_vector); + }; + m.def("_log10_result_type", log10_result_type_pyapi); + } +} + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/log10.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/log10.hpp new file mode 100644 index 0000000000..3972695849 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/log10.hpp @@ -0,0 +1,42 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +extern void init_log10(py::module_ m); + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/log1p.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/log1p.cpp new file mode 100644 index 0000000000..0219af01de --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/log1p.cpp @@ -0,0 +1,121 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#include "dpctl4pybind11.hpp" +#include +#include +#include +#include +#include + +#include "elementwise_functions.hpp" +#include "log1p.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/log1p.hpp" + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +// U21: ==== LOG1P (x) +namespace impl +{ + +namespace log1p_fn_ns = dpctl::tensor::kernels::log1p; + +static unary_contig_impl_fn_ptr_t + log1p_contig_dispatch_vector[td_ns::num_types]; +static int log1p_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + log1p_strided_dispatch_vector[td_ns::num_types]; + +void populate_log1p_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = log1p_fn_ns; + + using fn_ns::Log1pContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(log1p_contig_dispatch_vector); + + using fn_ns::Log1pStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(log1p_strided_dispatch_vector); + + using fn_ns::Log1pTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(log1p_output_typeid_vector); +}; + +} // namespace impl + +void init_log1p(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_log1p_dispatch_vectors(); + using impl::log1p_contig_dispatch_vector; + using impl::log1p_output_typeid_vector; + using impl::log1p_strided_dispatch_vector; + + auto log1p_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, log1p_output_typeid_vector, + log1p_contig_dispatch_vector, log1p_strided_dispatch_vector); + }; + m.def("_log1p", log1p_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto log1p_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, + log1p_output_typeid_vector); + }; + m.def("_log1p_result_type", log1p_result_type_pyapi); + } +} + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/log1p.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/log1p.hpp new file mode 100644 index 0000000000..438b93601c --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/log1p.hpp @@ -0,0 +1,42 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +extern void init_log1p(py::module_ m); + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/log2.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/log2.cpp new file mode 100644 index 0000000000..6906977d73 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/log2.cpp @@ -0,0 +1,119 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#include "dpctl4pybind11.hpp" +#include +#include +#include +#include +#include + +#include "elementwise_functions.hpp" +#include "log2.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/log2.hpp" + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +// U22: ==== LOG2 (x) +namespace impl +{ + +namespace log2_fn_ns = dpctl::tensor::kernels::log2; + +static unary_contig_impl_fn_ptr_t log2_contig_dispatch_vector[td_ns::num_types]; +static int log2_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + log2_strided_dispatch_vector[td_ns::num_types]; + +void populate_log2_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = log2_fn_ns; + + using fn_ns::Log2ContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(log2_contig_dispatch_vector); + + using fn_ns::Log2StridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(log2_strided_dispatch_vector); + + using fn_ns::Log2TypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(log2_output_typeid_vector); +}; + +} // namespace impl + +void init_log2(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_log2_dispatch_vectors(); + using impl::log2_contig_dispatch_vector; + using impl::log2_output_typeid_vector; + using impl::log2_strided_dispatch_vector; + + auto log2_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, log2_output_typeid_vector, + log2_contig_dispatch_vector, log2_strided_dispatch_vector); + }; + m.def("_log2", log2_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto log2_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, log2_output_typeid_vector); + }; + m.def("_log2_result_type", log2_result_type_pyapi); + } +} + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/log2.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/log2.hpp new file mode 100644 index 0000000000..4e47ed369a --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/log2.hpp @@ -0,0 +1,42 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +extern void init_log2(py::module_ m); + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/logaddexp.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/logaddexp.cpp new file mode 100644 index 0000000000..21cfc9ce3b --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/logaddexp.cpp @@ -0,0 +1,140 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#include "dpctl4pybind11.hpp" +#include +#include +#include +#include +#include + +#include "elementwise_functions.hpp" +#include "logaddexp.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/logaddexp.hpp" + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::binary_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_strided_impl_fn_ptr_t; + +// B15: ===== LOGADDEXP (x1, x2) +namespace impl +{ +namespace logaddexp_fn_ns = dpctl::tensor::kernels::logaddexp; + +static binary_contig_impl_fn_ptr_t + logaddexp_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static int logaddexp_output_id_table[td_ns::num_types][td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + logaddexp_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +void populate_logaddexp_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = logaddexp_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::LogAddExpTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(logaddexp_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::LogAddExpStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(logaddexp_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::LogAddExpContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(logaddexp_contig_dispatch_table); +}; + +} // namespace impl + +void init_logaddexp(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_logaddexp_dispatch_tables(); + using impl::logaddexp_contig_dispatch_table; + using impl::logaddexp_output_id_table; + using impl::logaddexp_strided_dispatch_table; + + auto logaddexp_pyapi = [&](const arrayT &src1, const arrayT &src2, + const arrayT &dst, sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, logaddexp_output_id_table, + // function pointers to handle operation on contiguous arrays + // (pointers may be nullptr) + logaddexp_contig_dispatch_table, + // function pointers to handle operation on strided arrays (most + // general case) + logaddexp_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + auto logaddexp_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + logaddexp_output_id_table); + }; + m.def("_logaddexp", logaddexp_pyapi, "", py::arg("src1"), + py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_logaddexp_result_type", logaddexp_result_type_pyapi, ""); + } +} + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/logaddexp.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/logaddexp.hpp new file mode 100644 index 0000000000..6601b3f9c5 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/logaddexp.hpp @@ -0,0 +1,42 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +extern void init_logaddexp(py::module_ m); + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/logical_and.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/logical_and.cpp new file mode 100644 index 0000000000..76a9b82f49 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/logical_and.cpp @@ -0,0 +1,140 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#include "dpctl4pybind11.hpp" +#include +#include +#include +#include +#include + +#include "elementwise_functions.hpp" +#include "logical_and.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/logical_and.hpp" + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::binary_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_strided_impl_fn_ptr_t; + +// B16: ===== LOGICAL_AND (x1, x2) +namespace impl +{ +namespace logical_and_fn_ns = dpctl::tensor::kernels::logical_and; + +static binary_contig_impl_fn_ptr_t + logical_and_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static int logical_and_output_id_table[td_ns::num_types][td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + logical_and_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +void populate_logical_and_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = logical_and_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::LogicalAndTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(logical_and_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::LogicalAndStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(logical_and_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::LogicalAndContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(logical_and_contig_dispatch_table); +}; + +} // namespace impl + +void init_logical_and(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_logical_and_dispatch_tables(); + using impl::logical_and_contig_dispatch_table; + using impl::logical_and_output_id_table; + using impl::logical_and_strided_dispatch_table; + + auto logical_and_pyapi = [&](const arrayT &src1, const arrayT &src2, + const arrayT &dst, sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, logical_and_output_id_table, + // function pointers to handle operation on contiguous arrays + // (pointers may be nullptr) + logical_and_contig_dispatch_table, + // function pointers to handle operation on strided arrays (most + // general case) + logical_and_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + auto logical_and_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + logical_and_output_id_table); + }; + m.def("_logical_and", logical_and_pyapi, "", py::arg("src1"), + py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_logical_and_result_type", logical_and_result_type_pyapi, ""); + } +} + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/logical_and.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/logical_and.hpp new file mode 100644 index 0000000000..ee73f7c8d5 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/logical_and.hpp @@ -0,0 +1,42 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +extern void init_logical_and(py::module_ m); + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/logical_not.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/logical_not.cpp new file mode 100644 index 0000000000..58fa5d45d7 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/logical_not.cpp @@ -0,0 +1,123 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#include "dpctl4pybind11.hpp" +#include +#include +#include +#include +#include + +#include "elementwise_functions.hpp" +#include "logical_not.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/logical_not.hpp" + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +// U24: ==== LOGICAL_NOT (x) +namespace impl +{ + +namespace logical_not_fn_ns = dpctl::tensor::kernels::logical_not; + +static unary_contig_impl_fn_ptr_t + logical_not_contig_dispatch_vector[td_ns::num_types]; +static int logical_not_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + logical_not_strided_dispatch_vector[td_ns::num_types]; + +void populate_logical_not_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = logical_not_fn_ns; + + using fn_ns::LogicalNotContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(logical_not_contig_dispatch_vector); + + using fn_ns::LogicalNotStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(logical_not_strided_dispatch_vector); + + using fn_ns::LogicalNotTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(logical_not_output_typeid_vector); +}; + +} // namespace impl + +void init_logical_not(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_logical_not_dispatch_vectors(); + using impl::logical_not_contig_dispatch_vector; + using impl::logical_not_output_typeid_vector; + using impl::logical_not_strided_dispatch_vector; + + auto logical_not_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc(src, dst, exec_q, depends, + logical_not_output_typeid_vector, + logical_not_contig_dispatch_vector, + logical_not_strided_dispatch_vector); + }; + m.def("_logical_not", logical_not_pyapi, "", py::arg("src"), + py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + + auto logical_not_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, + logical_not_output_typeid_vector); + }; + m.def("_logical_not_result_type", logical_not_result_type_pyapi); + } +} + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/logical_not.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/logical_not.hpp new file mode 100644 index 0000000000..c1a2c393aa --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/logical_not.hpp @@ -0,0 +1,42 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +extern void init_logical_not(py::module_ m); + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/logical_or.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/logical_or.cpp new file mode 100644 index 0000000000..a19ce43079 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/logical_or.cpp @@ -0,0 +1,140 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#include "dpctl4pybind11.hpp" +#include +#include +#include +#include +#include + +#include "elementwise_functions.hpp" +#include "logical_or.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/logical_or.hpp" + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::binary_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_strided_impl_fn_ptr_t; + +// B17: ===== LOGICAL_OR (x1, x2) +namespace impl +{ +namespace logical_or_fn_ns = dpctl::tensor::kernels::logical_or; + +static binary_contig_impl_fn_ptr_t + logical_or_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static int logical_or_output_id_table[td_ns::num_types][td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + logical_or_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +void populate_logical_or_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = logical_or_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::LogicalOrTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(logical_or_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::LogicalOrStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(logical_or_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::LogicalOrContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(logical_or_contig_dispatch_table); +}; + +} // namespace impl + +void init_logical_or(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_logical_or_dispatch_tables(); + using impl::logical_or_contig_dispatch_table; + using impl::logical_or_output_id_table; + using impl::logical_or_strided_dispatch_table; + + auto logical_or_pyapi = [&](const arrayT &src1, const arrayT &src2, + const arrayT &dst, sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, logical_or_output_id_table, + // function pointers to handle operation on contiguous arrays + // (pointers may be nullptr) + logical_or_contig_dispatch_table, + // function pointers to handle operation on strided arrays (most + // general case) + logical_or_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + auto logical_or_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + logical_or_output_id_table); + }; + m.def("_logical_or", logical_or_pyapi, "", py::arg("src1"), + py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_logical_or_result_type", logical_or_result_type_pyapi, ""); + } +} + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/logical_or.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/logical_or.hpp new file mode 100644 index 0000000000..00a4ddfcc2 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/logical_or.hpp @@ -0,0 +1,42 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +extern void init_logical_xor(py::module_ m); + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/logical_xor.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/logical_xor.cpp new file mode 100644 index 0000000000..0495b1721f --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/logical_xor.cpp @@ -0,0 +1,140 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#include "dpctl4pybind11.hpp" +#include +#include +#include +#include +#include + +#include "elementwise_functions.hpp" +#include "logical_xor.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/logical_xor.hpp" + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::binary_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_strided_impl_fn_ptr_t; + +// B18: ===== LOGICAL_XOR (x1, x2) +namespace impl +{ +namespace logical_xor_fn_ns = dpctl::tensor::kernels::logical_xor; + +static binary_contig_impl_fn_ptr_t + logical_xor_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static int logical_xor_output_id_table[td_ns::num_types][td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + logical_xor_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +void populate_logical_xor_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = logical_xor_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::LogicalXorTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(logical_xor_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::LogicalXorStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(logical_xor_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::LogicalXorContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(logical_xor_contig_dispatch_table); +}; + +} // namespace impl + +void init_logical_xor(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_logical_xor_dispatch_tables(); + using impl::logical_xor_contig_dispatch_table; + using impl::logical_xor_output_id_table; + using impl::logical_xor_strided_dispatch_table; + + auto logical_xor_pyapi = [&](const arrayT &src1, const arrayT &src2, + const arrayT &dst, sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, logical_xor_output_id_table, + // function pointers to handle operation on contiguous arrays + // (pointers may be nullptr) + logical_xor_contig_dispatch_table, + // function pointers to handle operation on strided arrays (most + // general case) + logical_xor_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + auto logical_xor_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + logical_xor_output_id_table); + }; + m.def("_logical_xor", logical_xor_pyapi, "", py::arg("src1"), + py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_logical_xor_result_type", logical_xor_result_type_pyapi, ""); + } +} + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/logical_xor.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/logical_xor.hpp new file mode 100644 index 0000000000..ad069eb120 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/logical_xor.hpp @@ -0,0 +1,42 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +extern void init_logical_or(py::module_ m); + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/maximum.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/maximum.cpp new file mode 100644 index 0000000000..7bb818c74f --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/maximum.cpp @@ -0,0 +1,140 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#include "dpctl4pybind11.hpp" +#include +#include +#include +#include +#include + +#include "elementwise_functions.hpp" +#include "maximum.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/maximum.hpp" + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::binary_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_strided_impl_fn_ptr_t; + +// B??: ===== MAXIMUM (x1, x2) +namespace impl +{ +namespace maximum_fn_ns = dpctl::tensor::kernels::maximum; + +static binary_contig_impl_fn_ptr_t + maximum_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static int maximum_output_id_table[td_ns::num_types][td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + maximum_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +void populate_maximum_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = maximum_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::MaximumTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(maximum_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::MaximumStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(maximum_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::MaximumContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(maximum_contig_dispatch_table); +}; + +} // namespace impl + +void init_maximum(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_maximum_dispatch_tables(); + using impl::maximum_contig_dispatch_table; + using impl::maximum_output_id_table; + using impl::maximum_strided_dispatch_table; + + auto maximum_pyapi = [&](const arrayT &src1, const arrayT &src2, + const arrayT &dst, sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, maximum_output_id_table, + // function pointers to handle operation on contiguous arrays + // (pointers may be nullptr) + maximum_contig_dispatch_table, + // function pointers to handle operation on strided arrays (most + // general case) + maximum_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + auto maximum_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + maximum_output_id_table); + }; + m.def("_maximum", maximum_pyapi, "", py::arg("src1"), py::arg("src2"), + py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_maximum_result_type", maximum_result_type_pyapi, ""); + } +} + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/maximum.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/maximum.hpp new file mode 100644 index 0000000000..0f49850567 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/maximum.hpp @@ -0,0 +1,42 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +extern void init_maximum(py::module_ m); + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/minimum.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/minimum.cpp new file mode 100644 index 0000000000..10753b981a --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/minimum.cpp @@ -0,0 +1,140 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#include "dpctl4pybind11.hpp" +#include +#include +#include +#include +#include + +#include "elementwise_functions.hpp" +#include "minimum.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/minimum.hpp" + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::binary_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_strided_impl_fn_ptr_t; + +// B??: ===== MINIMUM (x1, x2) +namespace impl +{ +namespace minimum_fn_ns = dpctl::tensor::kernels::minimum; + +static binary_contig_impl_fn_ptr_t + minimum_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static int minimum_output_id_table[td_ns::num_types][td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + minimum_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +void populate_minimum_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = minimum_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::MinimumTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(minimum_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::MinimumStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(minimum_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::MinimumContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(minimum_contig_dispatch_table); +}; + +} // namespace impl + +void init_minimum(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_minimum_dispatch_tables(); + using impl::minimum_contig_dispatch_table; + using impl::minimum_output_id_table; + using impl::minimum_strided_dispatch_table; + + auto minimum_pyapi = [&](const arrayT &src1, const arrayT &src2, + const arrayT &dst, sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, minimum_output_id_table, + // function pointers to handle operation on contiguous arrays + // (pointers may be nullptr) + minimum_contig_dispatch_table, + // function pointers to handle operation on strided arrays (most + // general case) + minimum_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + auto minimum_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + minimum_output_id_table); + }; + m.def("_minimum", minimum_pyapi, "", py::arg("src1"), py::arg("src2"), + py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_minimum_result_type", minimum_result_type_pyapi, ""); + } +} + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/minimum.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/minimum.hpp new file mode 100644 index 0000000000..f1f2467c1e --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/minimum.hpp @@ -0,0 +1,42 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +extern void init_minimum(py::module_ m); + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/multiply.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/multiply.cpp new file mode 100644 index 0000000000..9e8cf9808a --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/multiply.cpp @@ -0,0 +1,230 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#include "dpctl4pybind11.hpp" +#include +#include +#include +#include +#include + +#include "elementwise_functions.hpp" +#include "multiply.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/common_inplace.hpp" +#include "kernels/elementwise_functions/multiply.hpp" + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::binary_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_strided_impl_fn_ptr_t; + +using ew_cmn_ns::binary_inplace_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_inplace_row_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_inplace_strided_impl_fn_ptr_t; + +// B19: ===== MULTIPLY (x1, x2) +namespace impl +{ + +namespace multiply_fn_ns = dpctl::tensor::kernels::multiply; + +static binary_contig_impl_fn_ptr_t + multiply_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static int multiply_output_id_table[td_ns::num_types][td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + multiply_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +// mul(matrix, row) +static binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t + multiply_contig_matrix_contig_row_broadcast_dispatch_table + [td_ns::num_types][td_ns::num_types]; + +// mul(row, matrix) +static binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t + multiply_contig_row_contig_matrix_broadcast_dispatch_table + [td_ns::num_types][td_ns::num_types]; + +static binary_inplace_contig_impl_fn_ptr_t + multiply_inplace_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static binary_inplace_strided_impl_fn_ptr_t + multiply_inplace_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; +static binary_inplace_row_matrix_broadcast_impl_fn_ptr_t + multiply_inplace_row_matrix_dispatch_table[td_ns::num_types] + [td_ns::num_types]; + +void populate_multiply_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = multiply_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::MultiplyTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(multiply_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::MultiplyStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(multiply_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::MultiplyContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(multiply_contig_dispatch_table); + + // function pointers for operation on contiguous matrix, contiguous row + // with contiguous matrix output + using fn_ns::MultiplyContigMatrixContigRowBroadcastFactory; + DispatchTableBuilder< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t, + MultiplyContigMatrixContigRowBroadcastFactory, num_types> + dtb4; + dtb4.populate_dispatch_table( + multiply_contig_matrix_contig_row_broadcast_dispatch_table); + + // function pointers for operation on contiguous row, contiguous matrix + // with contiguous matrix output + using fn_ns::MultiplyContigRowContigMatrixBroadcastFactory; + DispatchTableBuilder< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t, + MultiplyContigRowContigMatrixBroadcastFactory, num_types> + dtb5; + dtb5.populate_dispatch_table( + multiply_contig_row_contig_matrix_broadcast_dispatch_table); + + // function pointers for inplace operation on general strided arrays + using fn_ns::MultiplyInplaceStridedFactory; + DispatchTableBuilder + dtb6; + dtb6.populate_dispatch_table(multiply_inplace_strided_dispatch_table); + + // function pointers for inplace operation on contiguous inputs and output + using fn_ns::MultiplyInplaceContigFactory; + DispatchTableBuilder + dtb7; + dtb7.populate_dispatch_table(multiply_inplace_contig_dispatch_table); + + // function pointers for inplace operation on contiguous matrix + // and contiguous row + using fn_ns::MultiplyInplaceRowMatrixBroadcastFactory; + DispatchTableBuilder + dtb8; + dtb8.populate_dispatch_table(multiply_inplace_row_matrix_dispatch_table); +}; + +} // namespace impl + +void init_multiply(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_multiply_dispatch_tables(); + using impl::multiply_contig_dispatch_table; + using impl::multiply_contig_matrix_contig_row_broadcast_dispatch_table; + using impl::multiply_contig_row_contig_matrix_broadcast_dispatch_table; + using impl::multiply_output_id_table; + using impl::multiply_strided_dispatch_table; + + auto multiply_pyapi = [&](const arrayT &src1, const arrayT &src2, + const arrayT &dst, sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, multiply_output_id_table, + // function pointers to handle operation on contiguous + // arrays (pointers may be nullptr) + multiply_contig_dispatch_table, + // function pointers to handle operation on strided arrays + // (most general case) + multiply_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix + // and c-contig row with broadcasting (may be nullptr) + multiply_contig_matrix_contig_row_broadcast_dispatch_table, + // function pointers to handle operation of c-contig matrix + // and c-contig row with broadcasting (may be nullptr) + multiply_contig_row_contig_matrix_broadcast_dispatch_table); + }; + auto multiply_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + multiply_output_id_table); + }; + m.def("_multiply", multiply_pyapi, "", py::arg("src1"), py::arg("src2"), + py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_multiply_result_type", multiply_result_type_pyapi, ""); + + using impl::multiply_inplace_contig_dispatch_table; + using impl::multiply_inplace_row_matrix_dispatch_table; + using impl::multiply_inplace_strided_dispatch_table; + + auto multiply_inplace_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_binary_inplace_ufunc( + src, dst, exec_q, depends, multiply_output_id_table, + // function pointers to handle inplace operation on + // contiguous arrays (pointers may be nullptr) + multiply_inplace_contig_dispatch_table, + // function pointers to handle inplace operation on strided + // arrays (most general case) + multiply_inplace_strided_dispatch_table, + // function pointers to handle inplace operation on + // c-contig matrix with c-contig row with broadcasting + // (may be nullptr) + multiply_inplace_row_matrix_dispatch_table); + }; + m.def("_multiply_inplace", multiply_inplace_pyapi, "", py::arg("lhs"), + py::arg("rhs"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + } +} + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/multiply.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/multiply.hpp new file mode 100644 index 0000000000..e110ecbb20 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/multiply.hpp @@ -0,0 +1,42 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +extern void init_multiply(py::module_ m); + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/negative.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/negative.cpp new file mode 100644 index 0000000000..0804c54bc3 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/negative.cpp @@ -0,0 +1,122 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#include "dpctl4pybind11.hpp" +#include +#include +#include +#include +#include + +#include "elementwise_functions.hpp" +#include "negative.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/negative.hpp" + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +// U25: ==== NEGATIVE (x) +namespace impl +{ + +namespace negative_fn_ns = dpctl::tensor::kernels::negative; + +static unary_contig_impl_fn_ptr_t + negative_contig_dispatch_vector[td_ns::num_types]; +static int negative_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + negative_strided_dispatch_vector[td_ns::num_types]; + +void populate_negative_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = negative_fn_ns; + + using fn_ns::NegativeContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(negative_contig_dispatch_vector); + + using fn_ns::NegativeStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(negative_strided_dispatch_vector); + + using fn_ns::NegativeTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(negative_output_typeid_vector); +}; + +} // namespace impl + +void init_negative(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_negative_dispatch_vectors(); + using impl::negative_contig_dispatch_vector; + using impl::negative_output_typeid_vector; + using impl::negative_strided_dispatch_vector; + + auto negative_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc(src, dst, exec_q, depends, + negative_output_typeid_vector, + negative_contig_dispatch_vector, + negative_strided_dispatch_vector); + }; + m.def("_negative", negative_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto negative_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, + negative_output_typeid_vector); + }; + m.def("_negative_result_type", negative_result_type_pyapi); + } +} + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/negative.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/negative.hpp new file mode 100644 index 0000000000..048e481b34 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/negative.hpp @@ -0,0 +1,42 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +extern void init_negative(py::module_ m); + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/not_equal.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/not_equal.cpp new file mode 100644 index 0000000000..77b6534eb2 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/not_equal.cpp @@ -0,0 +1,140 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#include "dpctl4pybind11.hpp" +#include +#include +#include +#include +#include + +#include "elementwise_functions.hpp" +#include "not_equal.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/not_equal.hpp" + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::binary_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_strided_impl_fn_ptr_t; + +// B20: ===== NOT_EQUAL (x1, x2) +namespace impl +{ +namespace not_equal_fn_ns = dpctl::tensor::kernels::not_equal; + +static binary_contig_impl_fn_ptr_t + not_equal_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static int not_equal_output_id_table[td_ns::num_types][td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + not_equal_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +void populate_not_equal_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = not_equal_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::NotEqualTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(not_equal_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::NotEqualStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(not_equal_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::NotEqualContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(not_equal_contig_dispatch_table); +}; + +} // namespace impl + +void init_not_equal(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_not_equal_dispatch_tables(); + using impl::not_equal_contig_dispatch_table; + using impl::not_equal_output_id_table; + using impl::not_equal_strided_dispatch_table; + + auto not_equal_pyapi = [&](const arrayT &src1, const arrayT &src2, + const arrayT &dst, sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, not_equal_output_id_table, + // function pointers to handle operation on contiguous arrays + // (pointers may be nullptr) + not_equal_contig_dispatch_table, + // function pointers to handle operation on strided arrays (most + // general case) + not_equal_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + auto not_equal_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + not_equal_output_id_table); + }; + m.def("_not_equal", not_equal_pyapi, "", py::arg("src1"), + py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_not_equal_result_type", not_equal_result_type_pyapi, ""); + } +} + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/not_equal.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/not_equal.hpp new file mode 100644 index 0000000000..4e1f654e79 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/not_equal.hpp @@ -0,0 +1,42 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +extern void init_not_equal(py::module_ m); + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/positive.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/positive.cpp new file mode 100644 index 0000000000..0f3ce3a385 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/positive.cpp @@ -0,0 +1,122 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#include "dpctl4pybind11.hpp" +#include +#include +#include +#include +#include + +#include "elementwise_functions.hpp" +#include "positive.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/positive.hpp" + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +// U26: ==== POSITIVE (x) +namespace impl +{ + +namespace positive_fn_ns = dpctl::tensor::kernels::positive; + +static unary_contig_impl_fn_ptr_t + positive_contig_dispatch_vector[td_ns::num_types]; +static int positive_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + positive_strided_dispatch_vector[td_ns::num_types]; + +void populate_positive_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = positive_fn_ns; + + using fn_ns::PositiveContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(positive_contig_dispatch_vector); + + using fn_ns::PositiveStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(positive_strided_dispatch_vector); + + using fn_ns::PositiveTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(positive_output_typeid_vector); +}; + +} // namespace impl + +void init_positive(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_positive_dispatch_vectors(); + using impl::positive_contig_dispatch_vector; + using impl::positive_output_typeid_vector; + using impl::positive_strided_dispatch_vector; + + auto positive_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc(src, dst, exec_q, depends, + positive_output_typeid_vector, + positive_contig_dispatch_vector, + positive_strided_dispatch_vector); + }; + m.def("_positive", positive_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto positive_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, + positive_output_typeid_vector); + }; + m.def("_positive_result_type", positive_result_type_pyapi); + } +} + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/positive.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/positive.hpp new file mode 100644 index 0000000000..a7b19a07ab --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/positive.hpp @@ -0,0 +1,42 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +extern void init_positive(py::module_ m); + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/pow.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/pow.cpp new file mode 100644 index 0000000000..da25d861d7 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/pow.cpp @@ -0,0 +1,189 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#include "dpctl4pybind11.hpp" +#include +#include +#include +#include +#include + +#include "elementwise_functions.hpp" +#include "pow.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/common_inplace.hpp" +#include "kernels/elementwise_functions/pow.hpp" + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::binary_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_strided_impl_fn_ptr_t; + +using ew_cmn_ns::binary_inplace_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_inplace_row_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_inplace_strided_impl_fn_ptr_t; + +// B21: ===== POW (x1, x2) +namespace impl +{ + +namespace pow_fn_ns = dpctl::tensor::kernels::pow; + +static binary_contig_impl_fn_ptr_t pow_contig_dispatch_table[td_ns::num_types] + [td_ns::num_types]; +static int pow_output_id_table[td_ns::num_types][td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + pow_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +static binary_inplace_contig_impl_fn_ptr_t + pow_inplace_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static binary_inplace_strided_impl_fn_ptr_t + pow_inplace_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +void populate_pow_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = pow_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::PowTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(pow_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::PowStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(pow_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::PowContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(pow_contig_dispatch_table); + + // function pointers for inplace operation on general strided arrays + using fn_ns::PowInplaceStridedFactory; + DispatchTableBuilder + dtb4; + dtb4.populate_dispatch_table(pow_inplace_strided_dispatch_table); + + // function pointers for inplace operation on contiguous inputs and output + using fn_ns::PowInplaceContigFactory; + DispatchTableBuilder + dtb5; + dtb5.populate_dispatch_table(pow_inplace_contig_dispatch_table); +}; + +} // namespace impl + +void init_pow(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_pow_dispatch_tables(); + using impl::pow_contig_dispatch_table; + using impl::pow_output_id_table; + using impl::pow_strided_dispatch_table; + + auto pow_pyapi = [&](const arrayT &src1, const arrayT &src2, + const arrayT &dst, sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, pow_output_id_table, + // function pointers to handle operation on contiguous arrays + // (pointers may be nullptr) + pow_contig_dispatch_table, + // function pointers to handle operation on strided arrays (most + // general case) + pow_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + auto pow_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + pow_output_id_table); + }; + m.def("_pow", pow_pyapi, "", py::arg("src1"), py::arg("src2"), + py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_pow_result_type", pow_result_type_pyapi, ""); + + using impl::pow_inplace_contig_dispatch_table; + using impl::pow_inplace_strided_dispatch_table; + + auto pow_inplace_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_binary_inplace_ufunc( + src, dst, exec_q, depends, pow_output_id_table, + // function pointers to handle inplace operation on + // contiguous arrays (pointers may be nullptr) + pow_inplace_contig_dispatch_table, + // function pointers to handle inplace operation on strided + // arrays (most general case) + pow_inplace_strided_dispatch_table, + // function pointers to handle inplace operation on + // c-contig matrix with c-contig row with broadcasting + // (may be nullptr) + td_ns::NullPtrTable< + binary_inplace_row_matrix_broadcast_impl_fn_ptr_t>{}); + }; + m.def("_pow_inplace", pow_inplace_pyapi, "", py::arg("lhs"), + py::arg("rhs"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + } +} + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/pow.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/pow.hpp new file mode 100644 index 0000000000..7a13b414eb --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/pow.hpp @@ -0,0 +1,42 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +extern void init_pow(py::module_ m); + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/proj.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/proj.cpp new file mode 100644 index 0000000000..f973e2c532 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/proj.cpp @@ -0,0 +1,119 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#include "dpctl4pybind11.hpp" +#include +#include +#include +#include +#include + +#include "elementwise_functions.hpp" +#include "proj.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/proj.hpp" + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +// U??: ==== PROJ (x) +namespace impl +{ + +namespace proj_fn_ns = dpctl::tensor::kernels::proj; + +static unary_contig_impl_fn_ptr_t proj_contig_dispatch_vector[td_ns::num_types]; +static int proj_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + proj_strided_dispatch_vector[td_ns::num_types]; + +void populate_proj_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = proj_fn_ns; + + using fn_ns::ProjContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(proj_contig_dispatch_vector); + + using fn_ns::ProjStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(proj_strided_dispatch_vector); + + using fn_ns::ProjTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(proj_output_typeid_vector); +}; + +} // namespace impl + +void init_proj(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_proj_dispatch_vectors(); + using impl::proj_contig_dispatch_vector; + using impl::proj_output_typeid_vector; + using impl::proj_strided_dispatch_vector; + + auto proj_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, proj_output_typeid_vector, + proj_contig_dispatch_vector, proj_strided_dispatch_vector); + }; + m.def("_proj", proj_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto proj_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, proj_output_typeid_vector); + }; + m.def("_proj_result_type", proj_result_type_pyapi); + } +} + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/proj.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/proj.hpp new file mode 100644 index 0000000000..efbe751455 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/proj.hpp @@ -0,0 +1,42 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +extern void init_proj(py::module_ m); + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/real.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/real.cpp new file mode 100644 index 0000000000..f14dd1b2eb --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/real.cpp @@ -0,0 +1,119 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#include "dpctl4pybind11.hpp" +#include +#include +#include +#include +#include + +#include "elementwise_functions.hpp" +#include "real.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/real.hpp" + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +// U27: ==== REAL (x) +namespace impl +{ + +namespace real_fn_ns = dpctl::tensor::kernels::real; + +static unary_contig_impl_fn_ptr_t real_contig_dispatch_vector[td_ns::num_types]; +static int real_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + real_strided_dispatch_vector[td_ns::num_types]; + +void populate_real_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = real_fn_ns; + + using fn_ns::RealContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(real_contig_dispatch_vector); + + using fn_ns::RealStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(real_strided_dispatch_vector); + + using fn_ns::RealTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(real_output_typeid_vector); +}; + +} // namespace impl + +void init_real(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_real_dispatch_vectors(); + using impl::real_contig_dispatch_vector; + using impl::real_output_typeid_vector; + using impl::real_strided_dispatch_vector; + + auto real_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, real_output_typeid_vector, + real_contig_dispatch_vector, real_strided_dispatch_vector); + }; + m.def("_real", real_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto real_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, real_output_typeid_vector); + }; + m.def("_real_result_type", real_result_type_pyapi); + } +} + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/real.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/real.hpp new file mode 100644 index 0000000000..b380632448 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/real.hpp @@ -0,0 +1,42 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +extern void init_real(py::module_ m); + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/remainder.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/remainder.cpp new file mode 100644 index 0000000000..468bb288c4 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/remainder.cpp @@ -0,0 +1,190 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#include "dpctl4pybind11.hpp" +#include +#include +#include +#include +#include + +#include "elementwise_functions.hpp" +#include "remainder.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/common_inplace.hpp" +#include "kernels/elementwise_functions/remainder.hpp" + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::binary_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_strided_impl_fn_ptr_t; + +using ew_cmn_ns::binary_inplace_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_inplace_row_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_inplace_strided_impl_fn_ptr_t; + +// B22: ===== REMAINDER (x1, x2) +namespace impl +{ + +namespace remainder_fn_ns = dpctl::tensor::kernels::remainder; + +static binary_contig_impl_fn_ptr_t + remainder_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static int remainder_output_id_table[td_ns::num_types][td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + remainder_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +static binary_inplace_contig_impl_fn_ptr_t + remainder_inplace_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static binary_inplace_strided_impl_fn_ptr_t + remainder_inplace_strided_dispatch_table[td_ns::num_types] + [td_ns::num_types]; + +void populate_remainder_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = remainder_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::RemainderTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(remainder_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::RemainderStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(remainder_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::RemainderContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(remainder_contig_dispatch_table); + + // function pointers for inplace operation on general strided arrays + using fn_ns::RemainderInplaceStridedFactory; + DispatchTableBuilder + dtb4; + dtb4.populate_dispatch_table(remainder_inplace_strided_dispatch_table); + + // function pointers for inplace operation on contiguous inputs and output + using fn_ns::RemainderInplaceContigFactory; + DispatchTableBuilder + dtb5; + dtb5.populate_dispatch_table(remainder_inplace_contig_dispatch_table); +} + +} // namespace impl + +void init_remainder(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_remainder_dispatch_tables(); + using impl::remainder_contig_dispatch_table; + using impl::remainder_output_id_table; + using impl::remainder_strided_dispatch_table; + + auto remainder_pyapi = [&](const arrayT &src1, const arrayT &src2, + const arrayT &dst, sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, remainder_output_id_table, + // function pointers to handle operation on contiguous arrays + // (pointers may be nullptr) + remainder_contig_dispatch_table, + // function pointers to handle operation on strided arrays (most + // general case) + remainder_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + auto remainder_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + remainder_output_id_table); + }; + m.def("_remainder", remainder_pyapi, "", py::arg("src1"), + py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_remainder_result_type", remainder_result_type_pyapi, ""); + + using impl::remainder_inplace_contig_dispatch_table; + using impl::remainder_inplace_strided_dispatch_table; + + auto remainder_inplace_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_binary_inplace_ufunc( + src, dst, exec_q, depends, remainder_output_id_table, + // function pointers to handle inplace operation on + // contiguous arrays (pointers may be nullptr) + remainder_inplace_contig_dispatch_table, + // function pointers to handle inplace operation on strided + // arrays (most general case) + remainder_inplace_strided_dispatch_table, + // function pointers to handle inplace operation on + // c-contig matrix with c-contig row with broadcasting + // (may be nullptr) + td_ns::NullPtrTable< + binary_inplace_row_matrix_broadcast_impl_fn_ptr_t>{}); + }; + m.def("_remainder_inplace", remainder_inplace_pyapi, "", py::arg("lhs"), + py::arg("rhs"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + } +} + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/remainder.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/remainder.hpp new file mode 100644 index 0000000000..ef538547a8 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/remainder.hpp @@ -0,0 +1,42 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +extern void init_remainder(py::module_ m); + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/round.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/round.cpp new file mode 100644 index 0000000000..324bd144f6 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/round.cpp @@ -0,0 +1,121 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#include "dpctl4pybind11.hpp" +#include +#include +#include +#include +#include + +#include "elementwise_functions.hpp" +#include "round.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/round.hpp" + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +// U28: ==== ROUND (x) +namespace impl +{ + +namespace round_fn_ns = dpctl::tensor::kernels::round; + +static unary_contig_impl_fn_ptr_t + round_contig_dispatch_vector[td_ns::num_types]; +static int round_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + round_strided_dispatch_vector[td_ns::num_types]; + +void populate_round_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = round_fn_ns; + + using fn_ns::RoundContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(round_contig_dispatch_vector); + + using fn_ns::RoundStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(round_strided_dispatch_vector); + + using fn_ns::RoundTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(round_output_typeid_vector); +}; + +} // namespace impl + +void init_round(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_round_dispatch_vectors(); + using impl::round_contig_dispatch_vector; + using impl::round_output_typeid_vector; + using impl::round_strided_dispatch_vector; + + auto round_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, round_output_typeid_vector, + round_contig_dispatch_vector, round_strided_dispatch_vector); + }; + m.def("_round", round_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto round_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, + round_output_typeid_vector); + }; + m.def("_round_result_type", round_result_type_pyapi); + } +} + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/round.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/round.hpp new file mode 100644 index 0000000000..5753ef233b --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/round.hpp @@ -0,0 +1,42 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +extern void init_round(py::module_ m); + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/rsqrt.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/rsqrt.cpp new file mode 100644 index 0000000000..e8282d5f4d --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/rsqrt.cpp @@ -0,0 +1,121 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#include "dpctl4pybind11.hpp" +#include +#include +#include +#include +#include + +#include "elementwise_functions.hpp" +#include "rsqrt.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/rsqrt.hpp" + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +// U??: ==== RSQRT (x) +namespace impl +{ + +namespace rsqrt_fn_ns = dpctl::tensor::kernels::rsqrt; + +static unary_contig_impl_fn_ptr_t + rsqrt_contig_dispatch_vector[td_ns::num_types]; +static int rsqrt_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + rsqrt_strided_dispatch_vector[td_ns::num_types]; + +void populate_rsqrt_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = rsqrt_fn_ns; + + using fn_ns::RsqrtContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(rsqrt_contig_dispatch_vector); + + using fn_ns::RsqrtStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(rsqrt_strided_dispatch_vector); + + using fn_ns::RsqrtTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(rsqrt_output_typeid_vector); +}; + +} // namespace impl + +void init_rsqrt(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_rsqrt_dispatch_vectors(); + using impl::rsqrt_contig_dispatch_vector; + using impl::rsqrt_output_typeid_vector; + using impl::rsqrt_strided_dispatch_vector; + + auto rsqrt_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, rsqrt_output_typeid_vector, + rsqrt_contig_dispatch_vector, rsqrt_strided_dispatch_vector); + }; + m.def("_rsqrt", rsqrt_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto rsqrt_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, + rsqrt_output_typeid_vector); + }; + m.def("_rsqrt_result_type", rsqrt_result_type_pyapi); + } +} + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/rsqrt.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/rsqrt.hpp new file mode 100644 index 0000000000..50efc16d79 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/rsqrt.hpp @@ -0,0 +1,42 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +extern void init_rsqrt(py::module_ m); + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/sign.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/sign.cpp new file mode 100644 index 0000000000..85aee184fe --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/sign.cpp @@ -0,0 +1,119 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#include "dpctl4pybind11.hpp" +#include +#include +#include +#include +#include + +#include "elementwise_functions.hpp" +#include "sign.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/sign.hpp" + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +// U29: ==== SIGN (x) +namespace impl +{ + +namespace sign_fn_ns = dpctl::tensor::kernels::sign; + +static unary_contig_impl_fn_ptr_t sign_contig_dispatch_vector[td_ns::num_types]; +static int sign_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + sign_strided_dispatch_vector[td_ns::num_types]; + +void populate_sign_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = sign_fn_ns; + + using fn_ns::SignContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(sign_contig_dispatch_vector); + + using fn_ns::SignStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(sign_strided_dispatch_vector); + + using fn_ns::SignTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(sign_output_typeid_vector); +}; + +} // namespace impl + +void init_sign(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_sign_dispatch_vectors(); + using impl::sign_contig_dispatch_vector; + using impl::sign_output_typeid_vector; + using impl::sign_strided_dispatch_vector; + + auto sign_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, sign_output_typeid_vector, + sign_contig_dispatch_vector, sign_strided_dispatch_vector); + }; + m.def("_sign", sign_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto sign_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, sign_output_typeid_vector); + }; + m.def("_sign_result_type", sign_result_type_pyapi); + } +} + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/sign.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/sign.hpp new file mode 100644 index 0000000000..fa01370842 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/sign.hpp @@ -0,0 +1,42 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +extern void init_sign(py::module_ m); + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/signbit.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/signbit.cpp new file mode 100644 index 0000000000..f24ed907b5 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/signbit.cpp @@ -0,0 +1,122 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#include "dpctl4pybind11.hpp" +#include +#include +#include +#include +#include + +#include "elementwise_functions.hpp" +#include "signbit.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/signbit.hpp" + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +// U??: ==== SIGNBIT (x) +namespace impl +{ + +namespace signbit_fn_ns = dpctl::tensor::kernels::signbit; + +static unary_contig_impl_fn_ptr_t + signbit_contig_dispatch_vector[td_ns::num_types]; +static int signbit_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + signbit_strided_dispatch_vector[td_ns::num_types]; + +void populate_signbit_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = signbit_fn_ns; + + using fn_ns::SignbitContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(signbit_contig_dispatch_vector); + + using fn_ns::SignbitStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(signbit_strided_dispatch_vector); + + using fn_ns::SignbitTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(signbit_output_typeid_vector); +}; + +} // namespace impl + +void init_signbit(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_signbit_dispatch_vectors(); + using impl::signbit_contig_dispatch_vector; + using impl::signbit_output_typeid_vector; + using impl::signbit_strided_dispatch_vector; + + auto signbit_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc(src, dst, exec_q, depends, + signbit_output_typeid_vector, + signbit_contig_dispatch_vector, + signbit_strided_dispatch_vector); + }; + m.def("_signbit", signbit_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto signbit_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, + signbit_output_typeid_vector); + }; + m.def("_signbit_result_type", signbit_result_type_pyapi); + } +} + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/signbit.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/signbit.hpp new file mode 100644 index 0000000000..85054bb4de --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/signbit.hpp @@ -0,0 +1,42 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +extern void init_signbit(py::module_ m); + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/sin.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/sin.cpp new file mode 100644 index 0000000000..76993251d9 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/sin.cpp @@ -0,0 +1,119 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#include "dpctl4pybind11.hpp" +#include +#include +#include +#include +#include + +#include "elementwise_functions.hpp" +#include "sin.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/sin.hpp" + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +// U30: ==== SIN (x) +namespace impl +{ + +namespace sin_fn_ns = dpctl::tensor::kernels::sin; + +static unary_contig_impl_fn_ptr_t sin_contig_dispatch_vector[td_ns::num_types]; +static int sin_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + sin_strided_dispatch_vector[td_ns::num_types]; + +void populate_sin_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = sin_fn_ns; + + using fn_ns::SinContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(sin_contig_dispatch_vector); + + using fn_ns::SinStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(sin_strided_dispatch_vector); + + using fn_ns::SinTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(sin_output_typeid_vector); +}; + +} // namespace impl + +void init_sin(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_sin_dispatch_vectors(); + using impl::sin_contig_dispatch_vector; + using impl::sin_output_typeid_vector; + using impl::sin_strided_dispatch_vector; + + auto sin_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, sin_output_typeid_vector, + sin_contig_dispatch_vector, sin_strided_dispatch_vector); + }; + m.def("_sin", sin_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto sin_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, sin_output_typeid_vector); + }; + m.def("_sin_result_type", sin_result_type_pyapi); + } +} + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/sin.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/sin.hpp new file mode 100644 index 0000000000..bd03604b16 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/sin.hpp @@ -0,0 +1,42 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +extern void init_sin(py::module_ m); + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/sinh.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/sinh.cpp new file mode 100644 index 0000000000..9befdfd1a7 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/sinh.cpp @@ -0,0 +1,119 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#include "dpctl4pybind11.hpp" +#include +#include +#include +#include +#include + +#include "elementwise_functions.hpp" +#include "sinh.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/sinh.hpp" + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +// U31: ==== SINH (x) +namespace impl +{ + +namespace sinh_fn_ns = dpctl::tensor::kernels::sinh; + +static unary_contig_impl_fn_ptr_t sinh_contig_dispatch_vector[td_ns::num_types]; +static int sinh_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + sinh_strided_dispatch_vector[td_ns::num_types]; + +void populate_sinh_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = sinh_fn_ns; + + using fn_ns::SinhContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(sinh_contig_dispatch_vector); + + using fn_ns::SinhStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(sinh_strided_dispatch_vector); + + using fn_ns::SinhTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(sinh_output_typeid_vector); +}; + +} // namespace impl + +void init_sinh(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_sinh_dispatch_vectors(); + using impl::sinh_contig_dispatch_vector; + using impl::sinh_output_typeid_vector; + using impl::sinh_strided_dispatch_vector; + + auto sinh_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, sinh_output_typeid_vector, + sinh_contig_dispatch_vector, sinh_strided_dispatch_vector); + }; + m.def("_sinh", sinh_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto sinh_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, sinh_output_typeid_vector); + }; + m.def("_sinh_result_type", sinh_result_type_pyapi); + } +} + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/sinh.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/sinh.hpp new file mode 100644 index 0000000000..fef8ec416a --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/sinh.hpp @@ -0,0 +1,42 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +extern void init_sinh(py::module_ m); + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/sqrt.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/sqrt.cpp new file mode 100644 index 0000000000..93c22603d5 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/sqrt.cpp @@ -0,0 +1,119 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#include "dpctl4pybind11.hpp" +#include +#include +#include +#include +#include + +#include "elementwise_functions.hpp" +#include "sqrt.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/sqrt.hpp" + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +// U33: ==== SQRT (x) +namespace impl +{ + +namespace sqrt_fn_ns = dpctl::tensor::kernels::sqrt; + +static unary_contig_impl_fn_ptr_t sqrt_contig_dispatch_vector[td_ns::num_types]; +static int sqrt_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + sqrt_strided_dispatch_vector[td_ns::num_types]; + +void populate_sqrt_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = sqrt_fn_ns; + + using fn_ns::SqrtContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(sqrt_contig_dispatch_vector); + + using fn_ns::SqrtStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(sqrt_strided_dispatch_vector); + + using fn_ns::SqrtTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(sqrt_output_typeid_vector); +}; + +} // namespace impl + +void init_sqrt(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_sqrt_dispatch_vectors(); + using impl::sqrt_contig_dispatch_vector; + using impl::sqrt_output_typeid_vector; + using impl::sqrt_strided_dispatch_vector; + + auto sqrt_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, sqrt_output_typeid_vector, + sqrt_contig_dispatch_vector, sqrt_strided_dispatch_vector); + }; + m.def("_sqrt", sqrt_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto sqrt_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, sqrt_output_typeid_vector); + }; + m.def("_sqrt_result_type", sqrt_result_type_pyapi); + } +} + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/sqrt.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/sqrt.hpp new file mode 100644 index 0000000000..38ea68635b --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/sqrt.hpp @@ -0,0 +1,42 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +extern void init_sqrt(py::module_ m); + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/square.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/square.cpp new file mode 100644 index 0000000000..45576a659c --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/square.cpp @@ -0,0 +1,121 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#include "dpctl4pybind11.hpp" +#include +#include +#include +#include +#include + +#include "elementwise_functions.hpp" +#include "square.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/square.hpp" + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +// U32: ==== SQUARE (x) +namespace impl +{ + +namespace square_fn_ns = dpctl::tensor::kernels::square; + +static unary_contig_impl_fn_ptr_t + square_contig_dispatch_vector[td_ns::num_types]; +static int square_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + square_strided_dispatch_vector[td_ns::num_types]; + +void populate_square_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = square_fn_ns; + + using fn_ns::SquareContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(square_contig_dispatch_vector); + + using fn_ns::SquareStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(square_strided_dispatch_vector); + + using fn_ns::SquareTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(square_output_typeid_vector); +}; + +} // namespace impl + +void init_square(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_square_dispatch_vectors(); + using impl::square_contig_dispatch_vector; + using impl::square_output_typeid_vector; + using impl::square_strided_dispatch_vector; + + auto square_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, square_output_typeid_vector, + square_contig_dispatch_vector, square_strided_dispatch_vector); + }; + m.def("_square", square_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto square_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, + square_output_typeid_vector); + }; + m.def("_square_result_type", square_result_type_pyapi); + } +} + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/square.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/square.hpp new file mode 100644 index 0000000000..d8268b728a --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/square.hpp @@ -0,0 +1,42 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +extern void init_square(py::module_ m); + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/subtract.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/subtract.cpp new file mode 100644 index 0000000000..743647f019 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/subtract.cpp @@ -0,0 +1,229 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#include "dpctl4pybind11.hpp" +#include +#include +#include +#include +#include + +#include "elementwise_functions.hpp" +#include "subtract.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/common_inplace.hpp" +#include "kernels/elementwise_functions/subtract.hpp" + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::binary_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_strided_impl_fn_ptr_t; + +using ew_cmn_ns::binary_inplace_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_inplace_row_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_inplace_strided_impl_fn_ptr_t; + +// B23: ===== SUBTRACT (x1, x2) +namespace impl +{ +namespace subtract_fn_ns = dpctl::tensor::kernels::subtract; + +static binary_contig_impl_fn_ptr_t + subtract_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static int subtract_output_id_table[td_ns::num_types][td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + subtract_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +// sub(matrix, row) +static binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t + subtract_contig_matrix_contig_row_broadcast_dispatch_table + [td_ns::num_types][td_ns::num_types]; + +// sub(row, matrix) +static binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t + subtract_contig_row_contig_matrix_broadcast_dispatch_table + [td_ns::num_types][td_ns::num_types]; + +static binary_inplace_contig_impl_fn_ptr_t + subtract_inplace_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static binary_inplace_strided_impl_fn_ptr_t + subtract_inplace_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; +static binary_inplace_row_matrix_broadcast_impl_fn_ptr_t + subtract_inplace_row_matrix_dispatch_table[td_ns::num_types] + [td_ns::num_types]; + +void populate_subtract_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = subtract_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::SubtractTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(subtract_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::SubtractStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(subtract_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::SubtractContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(subtract_contig_dispatch_table); + + // function pointers for operation on contiguous matrix, contiguous row + // with contiguous matrix output + using fn_ns::SubtractContigMatrixContigRowBroadcastFactory; + DispatchTableBuilder< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t, + SubtractContigMatrixContigRowBroadcastFactory, num_types> + dtb4; + dtb4.populate_dispatch_table( + subtract_contig_matrix_contig_row_broadcast_dispatch_table); + + // function pointers for operation on contiguous row, contiguous matrix + // with contiguous matrix output + using fn_ns::SubtractContigRowContigMatrixBroadcastFactory; + DispatchTableBuilder< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t, + SubtractContigRowContigMatrixBroadcastFactory, num_types> + dtb5; + dtb5.populate_dispatch_table( + subtract_contig_row_contig_matrix_broadcast_dispatch_table); + + // function pointers for inplace operation on general strided arrays + using fn_ns::SubtractInplaceStridedFactory; + DispatchTableBuilder + dtb6; + dtb6.populate_dispatch_table(subtract_inplace_strided_dispatch_table); + + // function pointers for inplace operation on contiguous inputs and output + using fn_ns::SubtractInplaceContigFactory; + DispatchTableBuilder + dtb7; + dtb7.populate_dispatch_table(subtract_inplace_contig_dispatch_table); + + // function pointers for inplace operation on contiguous matrix + // and contiguous row + using fn_ns::SubtractInplaceRowMatrixBroadcastFactory; + DispatchTableBuilder + dtb8; + dtb8.populate_dispatch_table(subtract_inplace_row_matrix_dispatch_table); +}; + +} // namespace impl + +void init_subtract(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_subtract_dispatch_tables(); + using impl::subtract_contig_dispatch_table; + using impl::subtract_contig_matrix_contig_row_broadcast_dispatch_table; + using impl::subtract_contig_row_contig_matrix_broadcast_dispatch_table; + using impl::subtract_output_id_table; + using impl::subtract_strided_dispatch_table; + + auto subtract_pyapi = [&](const arrayT &src1, const arrayT &src2, + const arrayT &dst, sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, subtract_output_id_table, + // function pointers to handle operation on contiguous + // arrays (pointers may be nullptr) + subtract_contig_dispatch_table, + // function pointers to handle operation on strided arrays + // (most general case) + subtract_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix + // and c-contig row with broadcasting (may be nullptr) + subtract_contig_matrix_contig_row_broadcast_dispatch_table, + // function pointers to handle operation of c-contig matrix + // and c-contig row with broadcasting (may be nullptr) + subtract_contig_row_contig_matrix_broadcast_dispatch_table); + }; + auto subtract_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + subtract_output_id_table); + }; + m.def("_subtract", subtract_pyapi, "", py::arg("src1"), py::arg("src2"), + py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_subtract_result_type", subtract_result_type_pyapi, ""); + + using impl::subtract_inplace_contig_dispatch_table; + using impl::subtract_inplace_row_matrix_dispatch_table; + using impl::subtract_inplace_strided_dispatch_table; + + auto subtract_inplace_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_binary_inplace_ufunc( + src, dst, exec_q, depends, subtract_output_id_table, + // function pointers to handle inplace operation on + // contiguous arrays (pointers may be nullptr) + subtract_inplace_contig_dispatch_table, + // function pointers to handle inplace operation on strided + // arrays (most general case) + subtract_inplace_strided_dispatch_table, + // function pointers to handle inplace operation on + // c-contig matrix with c-contig row with broadcasting + // (may be nullptr) + subtract_inplace_row_matrix_dispatch_table); + }; + m.def("_subtract_inplace", subtract_inplace_pyapi, "", py::arg("lhs"), + py::arg("rhs"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + } +} + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/subtract.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/subtract.hpp new file mode 100644 index 0000000000..0a4d707865 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/subtract.hpp @@ -0,0 +1,42 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +extern void init_subtract(py::module_ m); + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/tan.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/tan.cpp new file mode 100644 index 0000000000..c916d4ea9d --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/tan.cpp @@ -0,0 +1,119 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#include "dpctl4pybind11.hpp" +#include +#include +#include +#include +#include + +#include "elementwise_functions.hpp" +#include "tan.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/tan.hpp" + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +// U33: ==== TAN (x) +namespace impl +{ + +namespace tan_fn_ns = dpctl::tensor::kernels::tan; + +static unary_contig_impl_fn_ptr_t tan_contig_dispatch_vector[td_ns::num_types]; +static int tan_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + tan_strided_dispatch_vector[td_ns::num_types]; + +void populate_tan_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = tan_fn_ns; + + using fn_ns::TanContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(tan_contig_dispatch_vector); + + using fn_ns::TanStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(tan_strided_dispatch_vector); + + using fn_ns::TanTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(tan_output_typeid_vector); +}; + +} // namespace impl + +void init_tan(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_tan_dispatch_vectors(); + using impl::tan_contig_dispatch_vector; + using impl::tan_output_typeid_vector; + using impl::tan_strided_dispatch_vector; + + auto tan_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, tan_output_typeid_vector, + tan_contig_dispatch_vector, tan_strided_dispatch_vector); + }; + m.def("_tan", tan_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto tan_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, tan_output_typeid_vector); + }; + m.def("_tan_result_type", tan_result_type_pyapi); + } +} + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/tan.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/tan.hpp new file mode 100644 index 0000000000..f89c8b8f6d --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/tan.hpp @@ -0,0 +1,42 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +extern void init_tanh(py::module_ m); + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/tanh.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/tanh.cpp new file mode 100644 index 0000000000..70112e2024 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/tanh.cpp @@ -0,0 +1,119 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#include "dpctl4pybind11.hpp" +#include +#include +#include +#include +#include + +#include "elementwise_functions.hpp" +#include "tanh.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/tanh.hpp" + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +// U34: ==== TANH (x) +namespace impl +{ + +namespace tanh_fn_ns = dpctl::tensor::kernels::tanh; + +static unary_contig_impl_fn_ptr_t tanh_contig_dispatch_vector[td_ns::num_types]; +static int tanh_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + tanh_strided_dispatch_vector[td_ns::num_types]; + +void populate_tanh_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = tanh_fn_ns; + + using fn_ns::TanhContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(tanh_contig_dispatch_vector); + + using fn_ns::TanhStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(tanh_strided_dispatch_vector); + + using fn_ns::TanhTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(tanh_output_typeid_vector); +}; + +} // namespace impl + +void init_tanh(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_tanh_dispatch_vectors(); + using impl::tanh_contig_dispatch_vector; + using impl::tanh_output_typeid_vector; + using impl::tanh_strided_dispatch_vector; + + auto tanh_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, tanh_output_typeid_vector, + tanh_contig_dispatch_vector, tanh_strided_dispatch_vector); + }; + m.def("_tanh", tanh_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto tanh_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, tanh_output_typeid_vector); + }; + m.def("_tanh_result_type", tanh_result_type_pyapi); + } +} + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/tanh.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/tanh.hpp new file mode 100644 index 0000000000..e456182971 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/tanh.hpp @@ -0,0 +1,42 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +extern void init_tan(py::module_ m); + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/true_divide.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/true_divide.cpp new file mode 100644 index 0000000000..e638661511 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/true_divide.cpp @@ -0,0 +1,241 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#include "dpctl4pybind11.hpp" +#include +#include +#include +#include +#include + +#include "elementwise_functions.hpp" +#include "true_divide.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/common_inplace.hpp" +#include "kernels/elementwise_functions/true_divide.hpp" + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::binary_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_strided_impl_fn_ptr_t; + +using ew_cmn_ns::binary_inplace_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_inplace_row_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_inplace_strided_impl_fn_ptr_t; + +// B08: ===== DIVIDE (x1, x2) +namespace impl +{ +namespace true_divide_fn_ns = dpctl::tensor::kernels::true_divide; + +static binary_contig_impl_fn_ptr_t + true_divide_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static int true_divide_output_id_table[td_ns::num_types][td_ns::num_types]; +static int true_divide_inplace_output_id_table[td_ns::num_types] + [td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + true_divide_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +// divide(matrix, row) +static binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t + true_divide_contig_matrix_contig_row_broadcast_dispatch_table + [td_ns::num_types][td_ns::num_types]; + +// divide(row, matrix) +static binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t + true_divide_contig_row_contig_matrix_broadcast_dispatch_table + [td_ns::num_types][td_ns::num_types]; + +static binary_inplace_contig_impl_fn_ptr_t + true_divide_inplace_contig_dispatch_table[td_ns::num_types] + [td_ns::num_types]; +static binary_inplace_strided_impl_fn_ptr_t + true_divide_inplace_strided_dispatch_table[td_ns::num_types] + [td_ns::num_types]; +static binary_inplace_row_matrix_broadcast_impl_fn_ptr_t + true_divide_inplace_row_matrix_dispatch_table[td_ns::num_types] + [td_ns::num_types]; + +void populate_true_divide_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = true_divide_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::TrueDivideTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(true_divide_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::TrueDivideStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(true_divide_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::TrueDivideContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(true_divide_contig_dispatch_table); + + // function pointers for operation on contiguous matrix, contiguous row + // with contiguous matrix output + using fn_ns::TrueDivideContigMatrixContigRowBroadcastFactory; + DispatchTableBuilder< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t, + TrueDivideContigMatrixContigRowBroadcastFactory, num_types> + dtb4; + dtb4.populate_dispatch_table( + true_divide_contig_matrix_contig_row_broadcast_dispatch_table); + + // function pointers for operation on contiguous row, contiguous matrix + // with contiguous matrix output + using fn_ns::TrueDivideContigRowContigMatrixBroadcastFactory; + DispatchTableBuilder< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t, + TrueDivideContigRowContigMatrixBroadcastFactory, num_types> + dtb5; + dtb5.populate_dispatch_table( + true_divide_contig_row_contig_matrix_broadcast_dispatch_table); + + // which input types are supported, and what is the type of the result + using fn_ns::TrueDivideInplaceTypeMapFactory; + DispatchTableBuilder dtb6; + dtb6.populate_dispatch_table(true_divide_inplace_output_id_table); + + // function pointers for inplace operation on general strided arrays + using fn_ns::TrueDivideInplaceStridedFactory; + DispatchTableBuilder + dtb7; + dtb7.populate_dispatch_table(true_divide_inplace_strided_dispatch_table); + + // function pointers for inplace operation on contiguous inputs and output + using fn_ns::TrueDivideInplaceContigFactory; + DispatchTableBuilder + dtb8; + dtb8.populate_dispatch_table(true_divide_inplace_contig_dispatch_table); + + // function pointers for inplace operation on contiguous matrix + // and contiguous row + using fn_ns::TrueDivideInplaceRowMatrixBroadcastFactory; + DispatchTableBuilder + dtb9; + dtb9.populate_dispatch_table(true_divide_inplace_row_matrix_dispatch_table); +}; + +} // namespace impl + +void init_divide(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_true_divide_dispatch_tables(); + using impl::true_divide_contig_dispatch_table; + using impl:: + true_divide_contig_matrix_contig_row_broadcast_dispatch_table; + using impl:: + true_divide_contig_row_contig_matrix_broadcast_dispatch_table; + using impl::true_divide_output_id_table; + using impl::true_divide_strided_dispatch_table; + + auto divide_pyapi = [&](const arrayT &src1, const arrayT &src2, + const arrayT &dst, sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, true_divide_output_id_table, + // function pointers to handle operation on contiguous arrays + // (pointers may be nullptr) + true_divide_contig_dispatch_table, + // function pointers to handle operation on strided arrays (most + // general case) + true_divide_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + true_divide_contig_matrix_contig_row_broadcast_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + true_divide_contig_row_contig_matrix_broadcast_dispatch_table); + }; + auto divide_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + true_divide_output_id_table); + }; + m.def("_divide", divide_pyapi, "", py::arg("src1"), py::arg("src2"), + py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_divide_result_type", divide_result_type_pyapi, ""); + + using impl::true_divide_inplace_contig_dispatch_table; + using impl::true_divide_inplace_output_id_table; + using impl::true_divide_inplace_row_matrix_dispatch_table; + using impl::true_divide_inplace_strided_dispatch_table; + + auto divide_inplace_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_binary_inplace_ufunc( + src, dst, exec_q, depends, true_divide_inplace_output_id_table, + // function pointers to handle inplace operation on + // contiguous arrays (pointers may be nullptr) + true_divide_inplace_contig_dispatch_table, + // function pointers to handle inplace operation on strided + // arrays (most general case) + true_divide_inplace_strided_dispatch_table, + // function pointers to handle inplace operation on + // c-contig matrix with c-contig row with broadcasting + // (may be nullptr) + true_divide_inplace_row_matrix_dispatch_table); + }; + m.def("_divide_inplace", divide_inplace_pyapi, "", py::arg("lhs"), + py::arg("rhs"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + } +} + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/true_divide.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/true_divide.hpp new file mode 100644 index 0000000000..e29b858dae --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/true_divide.hpp @@ -0,0 +1,42 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +extern void init_divide(py::module_ m); + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/trunc.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/trunc.cpp new file mode 100644 index 0000000000..60af6beafa --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/trunc.cpp @@ -0,0 +1,121 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#include "dpctl4pybind11.hpp" +#include +#include +#include +#include +#include + +#include "elementwise_functions.hpp" +#include "trunc.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/trunc.hpp" + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::unary_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_strided_impl_fn_ptr_t; + +// U35: ==== TRUNC (x) +namespace impl +{ + +namespace trunc_fn_ns = dpctl::tensor::kernels::trunc; + +static unary_contig_impl_fn_ptr_t + trunc_contig_dispatch_vector[td_ns::num_types]; +static int trunc_output_typeid_vector[td_ns::num_types]; +static unary_strided_impl_fn_ptr_t + trunc_strided_dispatch_vector[td_ns::num_types]; + +void populate_trunc_dispatch_vectors(void) +{ + using namespace td_ns; + namespace fn_ns = trunc_fn_ns; + + using fn_ns::TruncContigFactory; + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(trunc_contig_dispatch_vector); + + using fn_ns::TruncStridedFactory; + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(trunc_strided_dispatch_vector); + + using fn_ns::TruncTypeMapFactory; + DispatchVectorBuilder dvb3; + dvb3.populate_dispatch_vector(trunc_output_typeid_vector); +}; + +} // namespace impl + +void init_trunc(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_trunc_dispatch_vectors(); + using impl::trunc_contig_dispatch_vector; + using impl::trunc_output_typeid_vector; + using impl::trunc_strided_dispatch_vector; + + auto trunc_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_unary_ufunc( + src, dst, exec_q, depends, trunc_output_typeid_vector, + trunc_contig_dispatch_vector, trunc_strided_dispatch_vector); + }; + m.def("_trunc", trunc_pyapi, "", py::arg("src"), py::arg("dst"), + py::arg("sycl_queue"), py::arg("depends") = py::list()); + + auto trunc_result_type_pyapi = [&](const py::dtype &dtype) { + return py_unary_ufunc_result_type(dtype, + trunc_output_typeid_vector); + }; + m.def("_trunc_result_type", trunc_result_type_pyapi); + } +} + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/trunc.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/trunc.hpp new file mode 100644 index 0000000000..cc28397f55 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/trunc.hpp @@ -0,0 +1,42 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +extern void init_trunc(py::module_ m); + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions1.cpp b/dpctl/tensor/libtensor/source/elementwise_functions1.cpp deleted file mode 100644 index 305655230d..0000000000 --- a/dpctl/tensor/libtensor/source/elementwise_functions1.cpp +++ /dev/null @@ -1,1242 +0,0 @@ -//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// -// -// Data Parallel Control (dpctl) -// -// Copyright 2020-2023 Intel Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// This file defines functions of dpctl.tensor._tensor_impl extensions, -/// specifically functions for elementwise operations. -//===----------------------------------------------------------------------===// - -#include "dpctl4pybind11.hpp" -#include -#include -#include -#include -#include - -#include "elementwise_functions.hpp" -#include "elementwise_functions1.hpp" -#include "utils/type_dispatch.hpp" - -#include "kernels/elementwise_functions/less.hpp" -#include "kernels/elementwise_functions/less_equal.hpp" -#include "kernels/elementwise_functions/log.hpp" -#include "kernels/elementwise_functions/log10.hpp" -#include "kernels/elementwise_functions/log1p.hpp" -#include "kernels/elementwise_functions/log2.hpp" -#include "kernels/elementwise_functions/logaddexp.hpp" -#include "kernels/elementwise_functions/logical_and.hpp" -#include "kernels/elementwise_functions/logical_not.hpp" -#include "kernels/elementwise_functions/logical_or.hpp" -#include "kernels/elementwise_functions/logical_xor.hpp" -#include "kernels/elementwise_functions/maximum.hpp" -#include "kernels/elementwise_functions/minimum.hpp" -#include "kernels/elementwise_functions/multiply.hpp" -#include "kernels/elementwise_functions/negative.hpp" - -namespace dpctl -{ -namespace tensor -{ -namespace py_internal -{ - -namespace td_ns = dpctl::tensor::type_dispatch; - -namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; -using ew_cmn_ns::binary_contig_impl_fn_ptr_t; -using ew_cmn_ns::binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t; -using ew_cmn_ns::binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t; -using ew_cmn_ns::binary_strided_impl_fn_ptr_t; -using ew_cmn_ns::unary_contig_impl_fn_ptr_t; -using ew_cmn_ns::unary_strided_impl_fn_ptr_t; - -using ew_cmn_ns::binary_inplace_contig_impl_fn_ptr_t; -using ew_cmn_ns::binary_inplace_row_matrix_broadcast_impl_fn_ptr_t; -using ew_cmn_ns::binary_inplace_strided_impl_fn_ptr_t; - -// B13: ==== LESS (x1, x2) -namespace impl -{ -namespace less_fn_ns = dpctl::tensor::kernels::less; - -static binary_contig_impl_fn_ptr_t less_contig_dispatch_table[td_ns::num_types] - [td_ns::num_types]; -static int less_output_id_table[td_ns::num_types][td_ns::num_types]; - -static binary_strided_impl_fn_ptr_t - less_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; - -void populate_less_dispatch_tables(void) -{ - using namespace td_ns; - namespace fn_ns = less_fn_ns; - - // which input types are supported, and what is the type of the result - using fn_ns::LessTypeMapFactory; - DispatchTableBuilder dtb1; - dtb1.populate_dispatch_table(less_output_id_table); - - // function pointers for operation on general strided arrays - using fn_ns::LessStridedFactory; - DispatchTableBuilder - dtb2; - dtb2.populate_dispatch_table(less_strided_dispatch_table); - - // function pointers for operation on contiguous inputs and output - using fn_ns::LessContigFactory; - DispatchTableBuilder - dtb3; - dtb3.populate_dispatch_table(less_contig_dispatch_table); -}; -} // namespace impl - -// B14: ==== LESS_EQUAL (x1, x2) -namespace impl -{ -namespace less_equal_fn_ns = dpctl::tensor::kernels::less_equal; - -static binary_contig_impl_fn_ptr_t - less_equal_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; -static int less_equal_output_id_table[td_ns::num_types][td_ns::num_types]; - -static binary_strided_impl_fn_ptr_t - less_equal_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; - -void populate_less_equal_dispatch_tables(void) -{ - using namespace td_ns; - namespace fn_ns = less_equal_fn_ns; - - // which input types are supported, and what is the type of the result - using fn_ns::LessEqualTypeMapFactory; - DispatchTableBuilder dtb1; - dtb1.populate_dispatch_table(less_equal_output_id_table); - - // function pointers for operation on general strided arrays - using fn_ns::LessEqualStridedFactory; - DispatchTableBuilder - dtb2; - dtb2.populate_dispatch_table(less_equal_strided_dispatch_table); - - // function pointers for operation on contiguous inputs and output - using fn_ns::LessEqualContigFactory; - DispatchTableBuilder - dtb3; - dtb3.populate_dispatch_table(less_equal_contig_dispatch_table); -}; -} // namespace impl - -// U20: ==== LOG (x) -namespace impl -{ - -namespace log_fn_ns = dpctl::tensor::kernels::log; - -static unary_contig_impl_fn_ptr_t log_contig_dispatch_vector[td_ns::num_types]; -static int log_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - log_strided_dispatch_vector[td_ns::num_types]; - -void populate_log_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = log_fn_ns; - - using fn_ns::LogContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(log_contig_dispatch_vector); - - using fn_ns::LogStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(log_strided_dispatch_vector); - - using fn_ns::LogTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(log_output_typeid_vector); -} - -} // namespace impl - -// U21: ==== LOG1P (x) -namespace impl -{ - -namespace log1p_fn_ns = dpctl::tensor::kernels::log1p; - -static unary_contig_impl_fn_ptr_t - log1p_contig_dispatch_vector[td_ns::num_types]; -static int log1p_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - log1p_strided_dispatch_vector[td_ns::num_types]; - -void populate_log1p_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = log1p_fn_ns; - - using fn_ns::Log1pContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(log1p_contig_dispatch_vector); - - using fn_ns::Log1pStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(log1p_strided_dispatch_vector); - - using fn_ns::Log1pTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(log1p_output_typeid_vector); -} - -} // namespace impl - -// U22: ==== LOG2 (x) -namespace impl -{ - -namespace log2_fn_ns = dpctl::tensor::kernels::log2; - -static unary_contig_impl_fn_ptr_t log2_contig_dispatch_vector[td_ns::num_types]; -static int log2_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - log2_strided_dispatch_vector[td_ns::num_types]; - -void populate_log2_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = log2_fn_ns; - - using fn_ns::Log2ContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(log2_contig_dispatch_vector); - - using fn_ns::Log2StridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(log2_strided_dispatch_vector); - - using fn_ns::Log2TypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(log2_output_typeid_vector); -}; - -} // namespace impl - -// U23: ==== LOG10 (x) -namespace impl -{ - -namespace log10_fn_ns = dpctl::tensor::kernels::log10; - -static unary_contig_impl_fn_ptr_t - log10_contig_dispatch_vector[td_ns::num_types]; -static int log10_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - log10_strided_dispatch_vector[td_ns::num_types]; - -void populate_log10_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = log10_fn_ns; - - using fn_ns::Log10ContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(log10_contig_dispatch_vector); - - using fn_ns::Log10StridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(log10_strided_dispatch_vector); - - using fn_ns::Log10TypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(log10_output_typeid_vector); -}; - -} // namespace impl - -// B15: ==== LOGADDEXP (x1, x2) -namespace impl -{ -namespace logaddexp_fn_ns = dpctl::tensor::kernels::logaddexp; - -static binary_contig_impl_fn_ptr_t - logaddexp_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; -static int logaddexp_output_id_table[td_ns::num_types][td_ns::num_types]; - -static binary_strided_impl_fn_ptr_t - logaddexp_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; - -void populate_logaddexp_dispatch_tables(void) -{ - using namespace td_ns; - namespace fn_ns = logaddexp_fn_ns; - - // which input types are supported, and what is the type of the result - using fn_ns::LogAddExpTypeMapFactory; - DispatchTableBuilder dtb1; - dtb1.populate_dispatch_table(logaddexp_output_id_table); - - // function pointers for operation on general strided arrays - using fn_ns::LogAddExpStridedFactory; - DispatchTableBuilder - dtb2; - dtb2.populate_dispatch_table(logaddexp_strided_dispatch_table); - - // function pointers for operation on contiguous inputs and output - using fn_ns::LogAddExpContigFactory; - DispatchTableBuilder - dtb3; - dtb3.populate_dispatch_table(logaddexp_contig_dispatch_table); -}; -} // namespace impl - -// B16: ==== LOGICAL_AND (x1, x2) -namespace impl -{ -namespace logical_and_fn_ns = dpctl::tensor::kernels::logical_and; - -static binary_contig_impl_fn_ptr_t - logical_and_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; -static int logical_and_output_id_table[td_ns::num_types][td_ns::num_types]; - -static binary_strided_impl_fn_ptr_t - logical_and_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; - -void populate_logical_and_dispatch_tables(void) -{ - using namespace td_ns; - namespace fn_ns = logical_and_fn_ns; - - // which input types are supported, and what is the type of the result - using fn_ns::LogicalAndTypeMapFactory; - DispatchTableBuilder dtb1; - dtb1.populate_dispatch_table(logical_and_output_id_table); - - // function pointers for operation on general strided arrays - using fn_ns::LogicalAndStridedFactory; - DispatchTableBuilder - dtb2; - dtb2.populate_dispatch_table(logical_and_strided_dispatch_table); - - // function pointers for operation on contiguous inputs and output - using fn_ns::LogicalAndContigFactory; - DispatchTableBuilder - dtb3; - dtb3.populate_dispatch_table(logical_and_contig_dispatch_table); -}; -} // namespace impl - -// U24: ==== LOGICAL_NOT (x) -namespace impl -{ -namespace logical_not_fn_ns = dpctl::tensor::kernels::logical_not; - -static unary_contig_impl_fn_ptr_t - logical_not_contig_dispatch_vector[td_ns::num_types]; -static int logical_not_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - logical_not_strided_dispatch_vector[td_ns::num_types]; - -void populate_logical_not_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = logical_not_fn_ns; - - using fn_ns::LogicalNotContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(logical_not_contig_dispatch_vector); - - using fn_ns::LogicalNotStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(logical_not_strided_dispatch_vector); - - using fn_ns::LogicalNotTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(logical_not_output_typeid_vector); -}; -} // namespace impl - -// B17: ==== LOGICAL_OR (x1, x2) -namespace impl -{ -namespace logical_or_fn_ns = dpctl::tensor::kernels::logical_or; - -static binary_contig_impl_fn_ptr_t - logical_or_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; -static int logical_or_output_id_table[td_ns::num_types][td_ns::num_types]; - -static binary_strided_impl_fn_ptr_t - logical_or_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; - -void populate_logical_or_dispatch_tables(void) -{ - using namespace td_ns; - namespace fn_ns = logical_or_fn_ns; - - // which input types are supported, and what is the type of the result - using fn_ns::LogicalOrTypeMapFactory; - DispatchTableBuilder dtb1; - dtb1.populate_dispatch_table(logical_or_output_id_table); - - // function pointers for operation on general strided arrays - using fn_ns::LogicalOrStridedFactory; - DispatchTableBuilder - dtb2; - dtb2.populate_dispatch_table(logical_or_strided_dispatch_table); - - // function pointers for operation on contiguous inputs and output - using fn_ns::LogicalOrContigFactory; - DispatchTableBuilder - dtb3; - dtb3.populate_dispatch_table(logical_or_contig_dispatch_table); -}; -} // namespace impl - -// B18: ==== LOGICAL_XOR (x1, x2) -namespace impl -{ -namespace logical_xor_fn_ns = dpctl::tensor::kernels::logical_xor; - -static binary_contig_impl_fn_ptr_t - logical_xor_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; -static int logical_xor_output_id_table[td_ns::num_types][td_ns::num_types]; - -static binary_strided_impl_fn_ptr_t - logical_xor_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; - -void populate_logical_xor_dispatch_tables(void) -{ - using namespace td_ns; - namespace fn_ns = logical_xor_fn_ns; - - // which input types are supported, and what is the type of the result - using fn_ns::LogicalXorTypeMapFactory; - DispatchTableBuilder dtb1; - dtb1.populate_dispatch_table(logical_xor_output_id_table); - - // function pointers for operation on general strided arrays - using fn_ns::LogicalXorStridedFactory; - DispatchTableBuilder - dtb2; - dtb2.populate_dispatch_table(logical_xor_strided_dispatch_table); - - // function pointers for operation on contiguous inputs and output - using fn_ns::LogicalXorContigFactory; - DispatchTableBuilder - dtb3; - dtb3.populate_dispatch_table(logical_xor_contig_dispatch_table); -}; -} // namespace impl - -// B??: ==== MAXIMUM (x1, x2) -namespace impl -{ - -namespace maximum_fn_ns = dpctl::tensor::kernels::maximum; - -static binary_contig_impl_fn_ptr_t - maximum_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; -static int maximum_output_id_table[td_ns::num_types][td_ns::num_types]; - -static binary_strided_impl_fn_ptr_t - maximum_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; - -void populate_maximum_dispatch_tables(void) -{ - using namespace td_ns; - namespace fn_ns = maximum_fn_ns; - - // which input types are supported, and what is the type of the result - using fn_ns::MaximumTypeMapFactory; - DispatchTableBuilder dtb1; - dtb1.populate_dispatch_table(maximum_output_id_table); - - // function pointers for operation on general strided arrays - using fn_ns::MaximumStridedFactory; - DispatchTableBuilder - dtb2; - dtb2.populate_dispatch_table(maximum_strided_dispatch_table); - - // function pointers for operation on contiguous inputs and output - using fn_ns::MaximumContigFactory; - DispatchTableBuilder - dtb3; - dtb3.populate_dispatch_table(maximum_contig_dispatch_table); -}; - -} // namespace impl - -// B??: ==== MINIMUM (x1, x2) -namespace impl -{ - -namespace minimum_fn_ns = dpctl::tensor::kernels::minimum; - -static binary_contig_impl_fn_ptr_t - minimum_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; -static int minimum_output_id_table[td_ns::num_types][td_ns::num_types]; - -static binary_strided_impl_fn_ptr_t - minimum_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; - -void populate_minimum_dispatch_tables(void) -{ - using namespace td_ns; - namespace fn_ns = minimum_fn_ns; - - // which input types are supported, and what is the type of the result - using fn_ns::MinimumTypeMapFactory; - DispatchTableBuilder dtb1; - dtb1.populate_dispatch_table(minimum_output_id_table); - - // function pointers for operation on general strided arrays - using fn_ns::MinimumStridedFactory; - DispatchTableBuilder - dtb2; - dtb2.populate_dispatch_table(minimum_strided_dispatch_table); - - // function pointers for operation on contiguous inputs and output - using fn_ns::MinimumContigFactory; - DispatchTableBuilder - dtb3; - dtb3.populate_dispatch_table(minimum_contig_dispatch_table); -}; - -} // namespace impl - -// B19: ==== MULTIPLY (x1, x2) -namespace impl -{ - -namespace multiply_fn_ns = dpctl::tensor::kernels::multiply; - -static binary_contig_impl_fn_ptr_t - multiply_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; -static int multiply_output_id_table[td_ns::num_types][td_ns::num_types]; - -static binary_strided_impl_fn_ptr_t - multiply_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; - -// mul(matrix, row) -static binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t - multiply_contig_matrix_contig_row_broadcast_dispatch_table - [td_ns::num_types][td_ns::num_types]; - -// mul(row, matrix) -static binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t - multiply_contig_row_contig_matrix_broadcast_dispatch_table - [td_ns::num_types][td_ns::num_types]; - -static binary_inplace_contig_impl_fn_ptr_t - multiply_inplace_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; -static binary_inplace_strided_impl_fn_ptr_t - multiply_inplace_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; -static binary_inplace_row_matrix_broadcast_impl_fn_ptr_t - multiply_inplace_row_matrix_dispatch_table[td_ns::num_types] - [td_ns::num_types]; - -void populate_multiply_dispatch_tables(void) -{ - using namespace td_ns; - namespace fn_ns = multiply_fn_ns; - - // which input types are supported, and what is the type of the result - using fn_ns::MultiplyTypeMapFactory; - DispatchTableBuilder dtb1; - dtb1.populate_dispatch_table(multiply_output_id_table); - - // function pointers for operation on general strided arrays - using fn_ns::MultiplyStridedFactory; - DispatchTableBuilder - dtb2; - dtb2.populate_dispatch_table(multiply_strided_dispatch_table); - - // function pointers for operation on contiguous inputs and output - using fn_ns::MultiplyContigFactory; - DispatchTableBuilder - dtb3; - dtb3.populate_dispatch_table(multiply_contig_dispatch_table); - - // function pointers for operation on contiguous matrix, contiguous row - // with contiguous matrix output - using fn_ns::MultiplyContigMatrixContigRowBroadcastFactory; - DispatchTableBuilder< - binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t, - MultiplyContigMatrixContigRowBroadcastFactory, num_types> - dtb4; - dtb4.populate_dispatch_table( - multiply_contig_matrix_contig_row_broadcast_dispatch_table); - - // function pointers for operation on contiguous row, contiguous matrix - // with contiguous matrix output - using fn_ns::MultiplyContigRowContigMatrixBroadcastFactory; - DispatchTableBuilder< - binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t, - MultiplyContigRowContigMatrixBroadcastFactory, num_types> - dtb5; - dtb5.populate_dispatch_table( - multiply_contig_row_contig_matrix_broadcast_dispatch_table); - - // function pointers for inplace operation on general strided arrays - using fn_ns::MultiplyInplaceStridedFactory; - DispatchTableBuilder - dtb6; - dtb6.populate_dispatch_table(multiply_inplace_strided_dispatch_table); - - // function pointers for inplace operation on contiguous inputs and output - using fn_ns::MultiplyInplaceContigFactory; - DispatchTableBuilder - dtb7; - dtb7.populate_dispatch_table(multiply_inplace_contig_dispatch_table); - - // function pointers for inplace operation on contiguous matrix - // and contiguous row - using fn_ns::MultiplyInplaceRowMatrixBroadcastFactory; - DispatchTableBuilder - dtb8; - dtb8.populate_dispatch_table(multiply_inplace_row_matrix_dispatch_table); -}; - -} // namespace impl - -// U25: ==== NEGATIVE (x) -namespace impl -{ - -namespace negative_fn_ns = dpctl::tensor::kernels::negative; - -static unary_contig_impl_fn_ptr_t - negative_contig_dispatch_vector[td_ns::num_types]; -static int negative_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - negative_strided_dispatch_vector[td_ns::num_types]; - -void populate_negative_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = negative_fn_ns; - - using fn_ns::NegativeContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(negative_contig_dispatch_vector); - - using fn_ns::NegativeStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(negative_strided_dispatch_vector); - - using fn_ns::NegativeTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(negative_output_typeid_vector); -} - -} // namespace impl - -// ========================================================================= // - -namespace py = pybind11; - -void init_elementwise_functions1(py::module_ m) -{ - - using arrayT = dpctl::tensor::usm_ndarray; - using event_vecT = std::vector; - - // B13: ==== LESS (x1, x2) - { - impl::populate_less_dispatch_tables(); - using impl::less_contig_dispatch_table; - using impl::less_output_id_table; - using impl::less_strided_dispatch_table; - - auto less_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, - const dpctl::tensor::usm_ndarray &src2, - const dpctl::tensor::usm_ndarray &dst, - sycl::queue &exec_q, - const std::vector &depends = {}) { - return py_binary_ufunc( - src1, src2, dst, exec_q, depends, less_output_id_table, - // function pointers to handle operation on contiguous arrays - // (pointers may be nullptr) - less_contig_dispatch_table, - // function pointers to handle operation on strided arrays (most - // general case) - less_strided_dispatch_table, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); - }; - auto less_result_type_pyapi = [&](const py::dtype &dtype1, - const py::dtype &dtype2) { - return py_binary_ufunc_result_type(dtype1, dtype2, - less_output_id_table); - }; - m.def("_less", less_pyapi, "", py::arg("src1"), py::arg("src2"), - py::arg("dst"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - m.def("_less_result_type", less_result_type_pyapi, ""); - } - - // B14: ==== LESS_EQUAL (x1, x2) - { - impl::populate_less_equal_dispatch_tables(); - using impl::less_equal_contig_dispatch_table; - using impl::less_equal_output_id_table; - using impl::less_equal_strided_dispatch_table; - - auto less_equal_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, - const dpctl::tensor::usm_ndarray &src2, - const dpctl::tensor::usm_ndarray &dst, - sycl::queue &exec_q, - const std::vector &depends = - {}) { - return py_binary_ufunc( - src1, src2, dst, exec_q, depends, less_equal_output_id_table, - // function pointers to handle operation on contiguous arrays - // (pointers may be nullptr) - less_equal_contig_dispatch_table, - // function pointers to handle operation on strided arrays (most - // general case) - less_equal_strided_dispatch_table, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); - }; - auto less_equal_result_type_pyapi = [&](const py::dtype &dtype1, - const py::dtype &dtype2) { - return py_binary_ufunc_result_type(dtype1, dtype2, - less_equal_output_id_table); - }; - m.def("_less_equal", less_equal_pyapi, "", py::arg("src1"), - py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - m.def("_less_equal_result_type", less_equal_result_type_pyapi, ""); - } - - // U20: ==== LOG (x) - { - impl::populate_log_dispatch_vectors(); - using impl::log_contig_dispatch_vector; - using impl::log_output_typeid_vector; - using impl::log_strided_dispatch_vector; - - auto log_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, log_output_typeid_vector, - log_contig_dispatch_vector, log_strided_dispatch_vector); - }; - m.def("_log", log_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto log_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, log_output_typeid_vector); - }; - m.def("_log_result_type", log_result_type_pyapi); - } - - // U21: ==== LOG1P (x) - { - impl::populate_log1p_dispatch_vectors(); - using impl::log1p_contig_dispatch_vector; - using impl::log1p_output_typeid_vector; - using impl::log1p_strided_dispatch_vector; - - auto log1p_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, log1p_output_typeid_vector, - log1p_contig_dispatch_vector, log1p_strided_dispatch_vector); - }; - m.def("_log1p", log1p_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto log1p_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, - log1p_output_typeid_vector); - }; - m.def("_log1p_result_type", log1p_result_type_pyapi); - } - - // U22: ==== LOG2 (x) - { - impl::populate_log2_dispatch_vectors(); - - using impl::log2_contig_dispatch_vector; - using impl::log2_output_typeid_vector; - using impl::log2_strided_dispatch_vector; - auto log2_pyapi = [&](const dpctl::tensor::usm_ndarray &src, - const dpctl::tensor::usm_ndarray &dst, - sycl::queue &exec_q, - const std::vector &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, log2_output_typeid_vector, - log2_contig_dispatch_vector, log2_strided_dispatch_vector); - }; - auto log2_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, log2_output_typeid_vector); - }; - m.def("_log2", log2_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - m.def("_log2_result_type", log2_result_type_pyapi, ""); - } - - // U23: ==== LOG10 (x) - { - impl::populate_log10_dispatch_vectors(); - - using impl::log10_contig_dispatch_vector; - using impl::log10_output_typeid_vector; - using impl::log10_strided_dispatch_vector; - auto log10_pyapi = [&](const dpctl::tensor::usm_ndarray &src, - const dpctl::tensor::usm_ndarray &dst, - sycl::queue &exec_q, - const std::vector &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, log10_output_typeid_vector, - log10_contig_dispatch_vector, log10_strided_dispatch_vector); - }; - auto log10_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, - log10_output_typeid_vector); - }; - m.def("_log10", log10_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - m.def("_log10_result_type", log10_result_type_pyapi, ""); - } - - // B15: ==== LOGADDEXP (x1, x2) - { - impl::populate_logaddexp_dispatch_tables(); - using impl::logaddexp_contig_dispatch_table; - using impl::logaddexp_output_id_table; - using impl::logaddexp_strided_dispatch_table; - - auto logaddexp_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, - const dpctl::tensor::usm_ndarray &src2, - const dpctl::tensor::usm_ndarray &dst, - sycl::queue &exec_q, - const std::vector &depends = - {}) { - return py_binary_ufunc( - src1, src2, dst, exec_q, depends, logaddexp_output_id_table, - // function pointers to handle operation on contiguous arrays - // (pointers may be nullptr) - logaddexp_contig_dispatch_table, - // function pointers to handle operation on strided arrays (most - // general case) - logaddexp_strided_dispatch_table, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); - }; - auto logaddexp_result_type_pyapi = [&](const py::dtype &dtype1, - const py::dtype &dtype2) { - return py_binary_ufunc_result_type(dtype1, dtype2, - logaddexp_output_id_table); - }; - m.def("_logaddexp", logaddexp_pyapi, "", py::arg("src1"), - py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - m.def("_logaddexp_result_type", logaddexp_result_type_pyapi, ""); - } - - // B16: ==== LOGICAL_AND (x1, x2) - { - impl::populate_logical_and_dispatch_tables(); - using impl::logical_and_contig_dispatch_table; - using impl::logical_and_output_id_table; - using impl::logical_and_strided_dispatch_table; - - auto logical_and_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, - const dpctl::tensor::usm_ndarray &src2, - const dpctl::tensor::usm_ndarray &dst, - sycl::queue &exec_q, - const std::vector &depends = - {}) { - return py_binary_ufunc( - src1, src2, dst, exec_q, depends, logical_and_output_id_table, - // function pointers to handle operation on contiguous arrays - // (pointers may be nullptr) - logical_and_contig_dispatch_table, - // function pointers to handle operation on strided arrays (most - // general case) - logical_and_strided_dispatch_table, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); - }; - auto logical_and_result_type_pyapi = [&](const py::dtype &dtype1, - const py::dtype &dtype2) { - return py_binary_ufunc_result_type(dtype1, dtype2, - logical_and_output_id_table); - }; - m.def("_logical_and", logical_and_pyapi, "", py::arg("src1"), - py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - m.def("_logical_and_result_type", logical_and_result_type_pyapi, ""); - } - - // U24: ==== LOGICAL_NOT (x) - { - impl::populate_logical_not_dispatch_vectors(); - using impl::logical_not_contig_dispatch_vector; - using impl::logical_not_output_typeid_vector; - using impl::logical_not_strided_dispatch_vector; - - auto logical_not_pyapi = [&](const arrayT &src, arrayT dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc(src, dst, exec_q, depends, - logical_not_output_typeid_vector, - logical_not_contig_dispatch_vector, - logical_not_strided_dispatch_vector); - }; - m.def("_logical_not", logical_not_pyapi, "", py::arg("src"), - py::arg("dst"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - - auto logical_not_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, - logical_not_output_typeid_vector); - }; - m.def("_logical_not_result_type", logical_not_result_type_pyapi); - } - - // B17: ==== LOGICAL_OR (x1, x2) - { - impl::populate_logical_or_dispatch_tables(); - using impl::logical_or_contig_dispatch_table; - using impl::logical_or_output_id_table; - using impl::logical_or_strided_dispatch_table; - - auto logical_or_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, - const dpctl::tensor::usm_ndarray &src2, - const dpctl::tensor::usm_ndarray &dst, - sycl::queue &exec_q, - const std::vector &depends = - {}) { - return py_binary_ufunc( - src1, src2, dst, exec_q, depends, logical_or_output_id_table, - // function pointers to handle operation on contiguous arrays - // (pointers may be nullptr) - logical_or_contig_dispatch_table, - // function pointers to handle operation on strided arrays (most - // general case) - logical_or_strided_dispatch_table, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); - }; - auto logical_or_result_type_pyapi = [&](const py::dtype &dtype1, - const py::dtype &dtype2) { - return py_binary_ufunc_result_type(dtype1, dtype2, - logical_or_output_id_table); - }; - m.def("_logical_or", logical_or_pyapi, "", py::arg("src1"), - py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - m.def("_logical_or_result_type", logical_or_result_type_pyapi, ""); - } - - // B18: ==== LOGICAL_XOR (x1, x2) - { - impl::populate_logical_xor_dispatch_tables(); - using impl::logical_xor_contig_dispatch_table; - using impl::logical_xor_output_id_table; - using impl::logical_xor_strided_dispatch_table; - - auto logical_xor_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, - const dpctl::tensor::usm_ndarray &src2, - const dpctl::tensor::usm_ndarray &dst, - sycl::queue &exec_q, - const std::vector &depends = - {}) { - return py_binary_ufunc( - src1, src2, dst, exec_q, depends, logical_xor_output_id_table, - // function pointers to handle operation on contiguous arrays - // (pointers may be nullptr) - logical_xor_contig_dispatch_table, - // function pointers to handle operation on strided arrays (most - // general case) - logical_xor_strided_dispatch_table, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); - }; - auto logical_xor_result_type_pyapi = [&](const py::dtype &dtype1, - const py::dtype &dtype2) { - return py_binary_ufunc_result_type(dtype1, dtype2, - logical_xor_output_id_table); - }; - m.def("_logical_xor", logical_xor_pyapi, "", py::arg("src1"), - py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - m.def("_logical_xor_result_type", logical_xor_result_type_pyapi, ""); - } - - // B??: ==== MAXIMUM (x1, x2) - { - impl::populate_maximum_dispatch_tables(); - using impl::maximum_contig_dispatch_table; - using impl::maximum_output_id_table; - using impl::maximum_strided_dispatch_table; - - auto maximum_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, - const dpctl::tensor::usm_ndarray &src2, - const dpctl::tensor::usm_ndarray &dst, - sycl::queue &exec_q, - const std::vector &depends = {}) { - return py_binary_ufunc( - src1, src2, dst, exec_q, depends, maximum_output_id_table, - // function pointers to handle operation on contiguous - // arrays (pointers may be nullptr) - maximum_contig_dispatch_table, - // function pointers to handle operation on strided arrays - // (most general case) - maximum_strided_dispatch_table, - // function pointers to handle operation of c-contig matrix - // and c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, - // function pointers to handle operation of c-contig matrix - // and c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); - }; - auto maximum_result_type_pyapi = [&](const py::dtype &dtype1, - const py::dtype &dtype2) { - return py_binary_ufunc_result_type(dtype1, dtype2, - maximum_output_id_table); - }; - m.def("_maximum", maximum_pyapi, "", py::arg("src1"), py::arg("src2"), - py::arg("dst"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - m.def("_maximum_result_type", maximum_result_type_pyapi, ""); - } - - // B??: ==== MINIMUM (x1, x2) - { - impl::populate_minimum_dispatch_tables(); - using impl::minimum_contig_dispatch_table; - using impl::minimum_output_id_table; - using impl::minimum_strided_dispatch_table; - - auto minimum_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, - const dpctl::tensor::usm_ndarray &src2, - const dpctl::tensor::usm_ndarray &dst, - sycl::queue &exec_q, - const std::vector &depends = {}) { - return py_binary_ufunc( - src1, src2, dst, exec_q, depends, minimum_output_id_table, - // function pointers to handle operation on contiguous - // arrays (pointers may be nullptr) - minimum_contig_dispatch_table, - // function pointers to handle operation on strided arrays - // (most general case) - minimum_strided_dispatch_table, - // function pointers to handle operation of c-contig matrix - // and c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); - }; - auto minimum_result_type_pyapi = [&](const py::dtype &dtype1, - const py::dtype &dtype2) { - return py_binary_ufunc_result_type(dtype1, dtype2, - minimum_output_id_table); - }; - m.def("_minimum", minimum_pyapi, "", py::arg("src1"), py::arg("src2"), - py::arg("dst"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - m.def("_minimum_result_type", minimum_result_type_pyapi, ""); - } - - // B19: ==== MULTIPLY (x1, x2) - { - impl::populate_multiply_dispatch_tables(); - using impl::multiply_contig_dispatch_table; - using impl::multiply_contig_matrix_contig_row_broadcast_dispatch_table; - using impl::multiply_contig_row_contig_matrix_broadcast_dispatch_table; - using impl::multiply_output_id_table; - using impl::multiply_strided_dispatch_table; - - auto multiply_pyapi = - [&](const dpctl::tensor::usm_ndarray &src1, - const dpctl::tensor::usm_ndarray &src2, - const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, - const std::vector &depends = {}) { - return py_binary_ufunc( - src1, src2, dst, exec_q, depends, multiply_output_id_table, - // function pointers to handle operation on contiguous - // arrays (pointers may be nullptr) - multiply_contig_dispatch_table, - // function pointers to handle operation on strided arrays - // (most general case) - multiply_strided_dispatch_table, - // function pointers to handle operation of c-contig matrix - // and c-contig row with broadcasting (may be nullptr) - multiply_contig_matrix_contig_row_broadcast_dispatch_table, - // function pointers to handle operation of c-contig matrix - // and c-contig row with broadcasting (may be nullptr) - multiply_contig_row_contig_matrix_broadcast_dispatch_table); - }; - auto multiply_result_type_pyapi = [&](const py::dtype &dtype1, - const py::dtype &dtype2) { - return py_binary_ufunc_result_type(dtype1, dtype2, - multiply_output_id_table); - }; - m.def("_multiply", multiply_pyapi, "", py::arg("src1"), py::arg("src2"), - py::arg("dst"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - m.def("_multiply_result_type", multiply_result_type_pyapi, ""); - - using impl::multiply_inplace_contig_dispatch_table; - using impl::multiply_inplace_row_matrix_dispatch_table; - using impl::multiply_inplace_strided_dispatch_table; - - auto multiply_inplace_pyapi = - [&](const dpctl::tensor::usm_ndarray &src, - const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, - const std::vector &depends = {}) { - return py_binary_inplace_ufunc( - src, dst, exec_q, depends, multiply_output_id_table, - // function pointers to handle inplace operation on - // contiguous arrays (pointers may be nullptr) - multiply_inplace_contig_dispatch_table, - // function pointers to handle inplace operation on strided - // arrays (most general case) - multiply_inplace_strided_dispatch_table, - // function pointers to handle inplace operation on - // c-contig matrix with c-contig row with broadcasting - // (may be nullptr) - multiply_inplace_row_matrix_dispatch_table); - }; - m.def("_multiply_inplace", multiply_inplace_pyapi, "", py::arg("lhs"), - py::arg("rhs"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - } - - // U25: ==== NEGATIVE (x) - { - impl::populate_negative_dispatch_vectors(); - using impl::negative_contig_dispatch_vector; - using impl::negative_output_typeid_vector; - using impl::negative_strided_dispatch_vector; - - auto negative_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc(src, dst, exec_q, depends, - negative_output_typeid_vector, - negative_contig_dispatch_vector, - negative_strided_dispatch_vector); - }; - m.def("_negative", negative_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto negative_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, - negative_output_typeid_vector); - }; - m.def("_negative_result_type", negative_result_type_pyapi); - } -} - -} // namespace py_internal -} // namespace tensor -} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions2.cpp b/dpctl/tensor/libtensor/source/elementwise_functions2.cpp deleted file mode 100644 index a274e5f150..0000000000 --- a/dpctl/tensor/libtensor/source/elementwise_functions2.cpp +++ /dev/null @@ -1,1454 +0,0 @@ -//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// -// -// Data Parallel Control (dpctl) -// -// Copyright 2020-2023 Intel Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// This file defines functions of dpctl.tensor._tensor_impl extensions, -/// specifically functions for elementwise operations. -//===----------------------------------------------------------------------===// - -#include "dpctl4pybind11.hpp" -#include -#include -#include -#include -#include - -#include "elementwise_functions.hpp" -#include "elementwise_functions2.hpp" -#include "utils/type_dispatch.hpp" - -#include "kernels/elementwise_functions/abs.hpp" -#include "kernels/elementwise_functions/acos.hpp" -#include "kernels/elementwise_functions/acosh.hpp" -#include "kernels/elementwise_functions/add.hpp" -#include "kernels/elementwise_functions/asin.hpp" -#include "kernels/elementwise_functions/asinh.hpp" -#include "kernels/elementwise_functions/atan.hpp" -#include "kernels/elementwise_functions/atan2.hpp" -#include "kernels/elementwise_functions/atanh.hpp" -#include "kernels/elementwise_functions/bitwise_and.hpp" -#include "kernels/elementwise_functions/bitwise_invert.hpp" -#include "kernels/elementwise_functions/bitwise_left_shift.hpp" -#include "kernels/elementwise_functions/bitwise_or.hpp" -#include "kernels/elementwise_functions/bitwise_right_shift.hpp" -#include "kernels/elementwise_functions/bitwise_xor.hpp" - -namespace dpctl -{ -namespace tensor -{ -namespace py_internal -{ - -namespace td_ns = dpctl::tensor::type_dispatch; - -namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; -using ew_cmn_ns::binary_contig_impl_fn_ptr_t; -using ew_cmn_ns::binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t; -using ew_cmn_ns::binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t; -using ew_cmn_ns::binary_strided_impl_fn_ptr_t; -using ew_cmn_ns::unary_contig_impl_fn_ptr_t; -using ew_cmn_ns::unary_strided_impl_fn_ptr_t; - -using ew_cmn_ns::binary_inplace_contig_impl_fn_ptr_t; -using ew_cmn_ns::binary_inplace_row_matrix_broadcast_impl_fn_ptr_t; -using ew_cmn_ns::binary_inplace_strided_impl_fn_ptr_t; - -// U01: ==== ABS (x) -namespace impl -{ - -namespace abs_fn_ns = dpctl::tensor::kernels::abs; - -static unary_contig_impl_fn_ptr_t abs_contig_dispatch_vector[td_ns::num_types]; -static int abs_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - abs_strided_dispatch_vector[td_ns::num_types]; - -void populate_abs_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = abs_fn_ns; - - using fn_ns::AbsContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(abs_contig_dispatch_vector); - - using fn_ns::AbsStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(abs_strided_dispatch_vector); - - using fn_ns::AbsTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(abs_output_typeid_vector); -}; - -} // namespace impl - -// U02: ==== ACOS (x) -namespace impl -{ - -namespace acos_fn_ns = dpctl::tensor::kernels::acos; - -static unary_contig_impl_fn_ptr_t acos_contig_dispatch_vector[td_ns::num_types]; -static int acos_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - acos_strided_dispatch_vector[td_ns::num_types]; - -void populate_acos_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = acos_fn_ns; - - using fn_ns::AcosContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(acos_contig_dispatch_vector); - - using fn_ns::AcosStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(acos_strided_dispatch_vector); - - using fn_ns::AcosTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(acos_output_typeid_vector); -} - -} // namespace impl - -// U03: ===== ACOSH (x) -namespace impl -{ - -namespace acosh_fn_ns = dpctl::tensor::kernels::acosh; - -static unary_contig_impl_fn_ptr_t - acosh_contig_dispatch_vector[td_ns::num_types]; -static int acosh_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - acosh_strided_dispatch_vector[td_ns::num_types]; - -void populate_acosh_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = acosh_fn_ns; - - using fn_ns::AcoshContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(acosh_contig_dispatch_vector); - - using fn_ns::AcoshStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(acosh_strided_dispatch_vector); - - using fn_ns::AcoshTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(acosh_output_typeid_vector); -} - -} // namespace impl - -// B01: ===== ADD (x1, x2) -namespace impl -{ -namespace add_fn_ns = dpctl::tensor::kernels::add; - -static binary_contig_impl_fn_ptr_t add_contig_dispatch_table[td_ns::num_types] - [td_ns::num_types]; -static int add_output_id_table[td_ns::num_types][td_ns::num_types]; - -static binary_strided_impl_fn_ptr_t - add_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; - -// add(matrix, row) -static binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t - add_contig_matrix_contig_row_broadcast_dispatch_table[td_ns::num_types] - [td_ns::num_types]; - -// add(row, matrix) -static binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t - add_contig_row_contig_matrix_broadcast_dispatch_table[td_ns::num_types] - [td_ns::num_types]; - -static binary_inplace_contig_impl_fn_ptr_t - add_inplace_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; -static binary_inplace_strided_impl_fn_ptr_t - add_inplace_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; -static binary_inplace_row_matrix_broadcast_impl_fn_ptr_t - add_inplace_row_matrix_dispatch_table[td_ns::num_types][td_ns::num_types]; - -void populate_add_dispatch_tables(void) -{ - using namespace td_ns; - namespace fn_ns = add_fn_ns; - - // which input types are supported, and what is the type of the result - using fn_ns::AddTypeMapFactory; - DispatchTableBuilder dtb1; - dtb1.populate_dispatch_table(add_output_id_table); - - // function pointers for operation on general strided arrays - using fn_ns::AddStridedFactory; - DispatchTableBuilder - dtb2; - dtb2.populate_dispatch_table(add_strided_dispatch_table); - - // function pointers for operation on contiguous inputs and output - using fn_ns::AddContigFactory; - DispatchTableBuilder - dtb3; - dtb3.populate_dispatch_table(add_contig_dispatch_table); - - // function pointers for operation on contiguous matrix, contiguous row - // with contiguous matrix output - using fn_ns::AddContigMatrixContigRowBroadcastFactory; - DispatchTableBuilder< - binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t, - AddContigMatrixContigRowBroadcastFactory, num_types> - dtb4; - dtb4.populate_dispatch_table( - add_contig_matrix_contig_row_broadcast_dispatch_table); - - // function pointers for operation on contiguous row, contiguous matrix - // with contiguous matrix output - using fn_ns::AddContigRowContigMatrixBroadcastFactory; - DispatchTableBuilder< - binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t, - AddContigRowContigMatrixBroadcastFactory, num_types> - dtb5; - dtb5.populate_dispatch_table( - add_contig_row_contig_matrix_broadcast_dispatch_table); - - // function pointers for inplace operation on general strided arrays - using fn_ns::AddInplaceStridedFactory; - DispatchTableBuilder - dtb6; - dtb6.populate_dispatch_table(add_inplace_strided_dispatch_table); - - // function pointers for inplace operation on contiguous inputs and output - using fn_ns::AddInplaceContigFactory; - DispatchTableBuilder - dtb7; - dtb7.populate_dispatch_table(add_inplace_contig_dispatch_table); - - // function pointers for inplace operation on contiguous matrix - // and contiguous row - using fn_ns::AddInplaceRowMatrixBroadcastFactory; - DispatchTableBuilder - dtb8; - dtb8.populate_dispatch_table(add_inplace_row_matrix_dispatch_table); -}; - -} // namespace impl - -// U04: ===== ASIN (x) -namespace impl -{ - -namespace asin_fn_ns = dpctl::tensor::kernels::asin; - -static unary_contig_impl_fn_ptr_t asin_contig_dispatch_vector[td_ns::num_types]; -static int asin_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - asin_strided_dispatch_vector[td_ns::num_types]; - -void populate_asin_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = asin_fn_ns; - - using fn_ns::AsinContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(asin_contig_dispatch_vector); - - using fn_ns::AsinStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(asin_strided_dispatch_vector); - - using fn_ns::AsinTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(asin_output_typeid_vector); -} - -} // namespace impl - -// U05: ===== ASINH (x) -namespace impl -{ - -namespace asinh_fn_ns = dpctl::tensor::kernels::asinh; - -static unary_contig_impl_fn_ptr_t - asinh_contig_dispatch_vector[td_ns::num_types]; -static int asinh_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - asinh_strided_dispatch_vector[td_ns::num_types]; - -void populate_asinh_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = asinh_fn_ns; - - using fn_ns::AsinhContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(asinh_contig_dispatch_vector); - - using fn_ns::AsinhStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(asinh_strided_dispatch_vector); - - using fn_ns::AsinhTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(asinh_output_typeid_vector); -} - -} // namespace impl - -// U06: ===== ATAN (x) -namespace impl -{ - -namespace atan_fn_ns = dpctl::tensor::kernels::atan; - -static unary_contig_impl_fn_ptr_t atan_contig_dispatch_vector[td_ns::num_types]; -static int atan_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - atan_strided_dispatch_vector[td_ns::num_types]; - -void populate_atan_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = atan_fn_ns; - - using fn_ns::AtanContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(atan_contig_dispatch_vector); - - using fn_ns::AtanStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(atan_strided_dispatch_vector); - - using fn_ns::AtanTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(atan_output_typeid_vector); -} - -} // namespace impl - -// B02: ===== ATAN2 (x1, x2) -namespace impl -{ -namespace atan2_fn_ns = dpctl::tensor::kernels::atan2; - -static binary_contig_impl_fn_ptr_t - atan2_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; -static int atan2_output_id_table[td_ns::num_types][td_ns::num_types]; - -static binary_strided_impl_fn_ptr_t - atan2_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; - -void populate_atan2_dispatch_tables(void) -{ - using namespace td_ns; - namespace fn_ns = atan2_fn_ns; - - // which input types are supported, and what is the type of the result - using fn_ns::Atan2TypeMapFactory; - DispatchTableBuilder dtb1; - dtb1.populate_dispatch_table(atan2_output_id_table); - - // function pointers for operation on general strided arrays - using fn_ns::Atan2StridedFactory; - DispatchTableBuilder - dtb2; - dtb2.populate_dispatch_table(atan2_strided_dispatch_table); - - // function pointers for operation on contiguous inputs and output - using fn_ns::Atan2ContigFactory; - DispatchTableBuilder - dtb3; - dtb3.populate_dispatch_table(atan2_contig_dispatch_table); -}; - -} // namespace impl - -// U07: ===== ATANH (x) -namespace impl -{ - -namespace atanh_fn_ns = dpctl::tensor::kernels::atanh; - -static unary_contig_impl_fn_ptr_t - atanh_contig_dispatch_vector[td_ns::num_types]; -static int atanh_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - atanh_strided_dispatch_vector[td_ns::num_types]; - -void populate_atanh_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = atanh_fn_ns; - - using fn_ns::AtanhContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(atanh_contig_dispatch_vector); - - using fn_ns::AtanhStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(atanh_strided_dispatch_vector); - - using fn_ns::AtanhTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(atanh_output_typeid_vector); -} - -} // namespace impl - -// B03: ===== BITWISE_AND (x1, x2) -namespace impl -{ -namespace bitwise_and_fn_ns = dpctl::tensor::kernels::bitwise_and; - -static binary_contig_impl_fn_ptr_t - bitwise_and_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; -static int bitwise_and_output_id_table[td_ns::num_types][td_ns::num_types]; - -static binary_strided_impl_fn_ptr_t - bitwise_and_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; - -static binary_inplace_contig_impl_fn_ptr_t - bitwise_and_inplace_contig_dispatch_table[td_ns::num_types] - [td_ns::num_types]; -static binary_inplace_strided_impl_fn_ptr_t - bitwise_and_inplace_strided_dispatch_table[td_ns::num_types] - [td_ns::num_types]; - -void populate_bitwise_and_dispatch_tables(void) -{ - using namespace td_ns; - namespace fn_ns = bitwise_and_fn_ns; - - // which input types are supported, and what is the type of the result - using fn_ns::BitwiseAndTypeMapFactory; - DispatchTableBuilder dtb1; - dtb1.populate_dispatch_table(bitwise_and_output_id_table); - - // function pointers for operation on general strided arrays - using fn_ns::BitwiseAndStridedFactory; - DispatchTableBuilder - dtb2; - dtb2.populate_dispatch_table(bitwise_and_strided_dispatch_table); - - // function pointers for operation on contiguous inputs and output - using fn_ns::BitwiseAndContigFactory; - DispatchTableBuilder - dtb3; - dtb3.populate_dispatch_table(bitwise_and_contig_dispatch_table); - - // function pointers for inplace operation on general strided arrays - using fn_ns::BitwiseAndInplaceStridedFactory; - DispatchTableBuilder - dtb4; - dtb4.populate_dispatch_table(bitwise_and_inplace_strided_dispatch_table); - - // function pointers for inplace operation on contiguous inputs and output - using fn_ns::BitwiseAndInplaceContigFactory; - DispatchTableBuilder - dtb5; - dtb5.populate_dispatch_table(bitwise_and_inplace_contig_dispatch_table); -}; - -} // namespace impl - -// B04: ===== BITWISE_LEFT_SHIFT (x1, x2) -namespace impl -{ -namespace bitwise_left_shift_fn_ns = dpctl::tensor::kernels::bitwise_left_shift; - -static binary_contig_impl_fn_ptr_t - bitwise_left_shift_contig_dispatch_table[td_ns::num_types] - [td_ns::num_types]; -static int bitwise_left_shift_output_id_table[td_ns::num_types] - [td_ns::num_types]; - -static binary_strided_impl_fn_ptr_t - bitwise_left_shift_strided_dispatch_table[td_ns::num_types] - [td_ns::num_types]; - -static binary_inplace_contig_impl_fn_ptr_t - bitwise_left_shift_inplace_contig_dispatch_table[td_ns::num_types] - [td_ns::num_types]; -static binary_inplace_strided_impl_fn_ptr_t - bitwise_left_shift_inplace_strided_dispatch_table[td_ns::num_types] - [td_ns::num_types]; - -void populate_bitwise_left_shift_dispatch_tables(void) -{ - using namespace td_ns; - namespace fn_ns = bitwise_left_shift_fn_ns; - - // which input types are supported, and what is the type of the result - using fn_ns::BitwiseLeftShiftTypeMapFactory; - DispatchTableBuilder dtb1; - dtb1.populate_dispatch_table(bitwise_left_shift_output_id_table); - - // function pointers for operation on general strided arrays - using fn_ns::BitwiseLeftShiftStridedFactory; - DispatchTableBuilder - dtb2; - dtb2.populate_dispatch_table(bitwise_left_shift_strided_dispatch_table); - - // function pointers for operation on contiguous inputs and output - using fn_ns::BitwiseLeftShiftContigFactory; - DispatchTableBuilder - dtb3; - dtb3.populate_dispatch_table(bitwise_left_shift_contig_dispatch_table); - - // function pointers for inplace operation on general strided arrays - using fn_ns::BitwiseLeftShiftInplaceStridedFactory; - DispatchTableBuilder - dtb4; - dtb4.populate_dispatch_table( - bitwise_left_shift_inplace_strided_dispatch_table); - - // function pointers for inplace operation on contiguous inputs and output - using fn_ns::BitwiseLeftShiftInplaceContigFactory; - DispatchTableBuilder - dtb5; - dtb5.populate_dispatch_table( - bitwise_left_shift_inplace_contig_dispatch_table); -}; - -} // namespace impl - -// U08: ===== BITWISE_INVERT (x) -namespace impl -{ - -namespace bitwise_invert_fn_ns = dpctl::tensor::kernels::bitwise_invert; - -static unary_contig_impl_fn_ptr_t - bitwise_invert_contig_dispatch_vector[td_ns::num_types]; -static int bitwise_invert_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - bitwise_invert_strided_dispatch_vector[td_ns::num_types]; - -void populate_bitwise_invert_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = bitwise_invert_fn_ns; - - using fn_ns::BitwiseInvertContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(bitwise_invert_contig_dispatch_vector); - - using fn_ns::BitwiseInvertStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(bitwise_invert_strided_dispatch_vector); - - using fn_ns::BitwiseInvertTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(bitwise_invert_output_typeid_vector); -}; - -} // namespace impl - -// B05: ===== BITWISE_OR (x1, x2) -namespace impl -{ -namespace bitwise_or_fn_ns = dpctl::tensor::kernels::bitwise_or; - -static binary_contig_impl_fn_ptr_t - bitwise_or_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; -static int bitwise_or_output_id_table[td_ns::num_types][td_ns::num_types]; - -static binary_strided_impl_fn_ptr_t - bitwise_or_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; - -static binary_inplace_contig_impl_fn_ptr_t - bitwise_or_inplace_contig_dispatch_table[td_ns::num_types] - [td_ns::num_types]; -static binary_inplace_strided_impl_fn_ptr_t - bitwise_or_inplace_strided_dispatch_table[td_ns::num_types] - [td_ns::num_types]; - -void populate_bitwise_or_dispatch_tables(void) -{ - using namespace td_ns; - namespace fn_ns = bitwise_or_fn_ns; - - // which input types are supported, and what is the type of the result - using fn_ns::BitwiseOrTypeMapFactory; - DispatchTableBuilder dtb1; - dtb1.populate_dispatch_table(bitwise_or_output_id_table); - - // function pointers for operation on general strided arrays - using fn_ns::BitwiseOrStridedFactory; - DispatchTableBuilder - dtb2; - dtb2.populate_dispatch_table(bitwise_or_strided_dispatch_table); - - // function pointers for operation on contiguous inputs and output - using fn_ns::BitwiseOrContigFactory; - DispatchTableBuilder - dtb3; - dtb3.populate_dispatch_table(bitwise_or_contig_dispatch_table); - - // function pointers for inplace operation on general strided arrays - using fn_ns::BitwiseOrInplaceStridedFactory; - DispatchTableBuilder - dtb4; - dtb4.populate_dispatch_table(bitwise_or_inplace_strided_dispatch_table); - - // function pointers for inplace operation on contiguous inputs and output - using fn_ns::BitwiseOrInplaceContigFactory; - DispatchTableBuilder - dtb5; - dtb5.populate_dispatch_table(bitwise_or_inplace_contig_dispatch_table); -}; -} // namespace impl - -// B06: ===== BITWISE_RIGHT_SHIFT (x1, x2) -namespace impl -{ -namespace bitwise_right_shift_fn_ns = - dpctl::tensor::kernels::bitwise_right_shift; - -static binary_contig_impl_fn_ptr_t - bitwise_right_shift_contig_dispatch_table[td_ns::num_types] - [td_ns::num_types]; -static int bitwise_right_shift_output_id_table[td_ns::num_types] - [td_ns::num_types]; - -static binary_strided_impl_fn_ptr_t - bitwise_right_shift_strided_dispatch_table[td_ns::num_types] - [td_ns::num_types]; - -static binary_inplace_contig_impl_fn_ptr_t - bitwise_right_shift_inplace_contig_dispatch_table[td_ns::num_types] - [td_ns::num_types]; -static binary_inplace_strided_impl_fn_ptr_t - bitwise_right_shift_inplace_strided_dispatch_table[td_ns::num_types] - [td_ns::num_types]; - -void populate_bitwise_right_shift_dispatch_tables(void) -{ - using namespace td_ns; - namespace fn_ns = bitwise_right_shift_fn_ns; - - // which input types are supported, and what is the type of the result - using fn_ns::BitwiseRightShiftTypeMapFactory; - DispatchTableBuilder dtb1; - dtb1.populate_dispatch_table(bitwise_right_shift_output_id_table); - - // function pointers for operation on general strided arrays - using fn_ns::BitwiseRightShiftStridedFactory; - DispatchTableBuilder - dtb2; - dtb2.populate_dispatch_table(bitwise_right_shift_strided_dispatch_table); - - // function pointers for operation on contiguous inputs and output - using fn_ns::BitwiseRightShiftContigFactory; - DispatchTableBuilder - dtb3; - dtb3.populate_dispatch_table(bitwise_right_shift_contig_dispatch_table); - - // function pointers for inplace operation on general strided arrays - using fn_ns::BitwiseRightShiftInplaceStridedFactory; - DispatchTableBuilder - dtb4; - dtb4.populate_dispatch_table( - bitwise_right_shift_inplace_strided_dispatch_table); - - // function pointers for inplace operation on contiguous inputs and output - using fn_ns::BitwiseRightShiftInplaceContigFactory; - DispatchTableBuilder - dtb5; - dtb5.populate_dispatch_table( - bitwise_right_shift_inplace_contig_dispatch_table); -}; - -} // namespace impl - -// B07: ===== BITWISE_XOR (x1, x2) -namespace impl -{ -namespace bitwise_xor_fn_ns = dpctl::tensor::kernels::bitwise_xor; - -static binary_contig_impl_fn_ptr_t - bitwise_xor_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; -static int bitwise_xor_output_id_table[td_ns::num_types][td_ns::num_types]; - -static binary_strided_impl_fn_ptr_t - bitwise_xor_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; - -static binary_inplace_contig_impl_fn_ptr_t - bitwise_xor_inplace_contig_dispatch_table[td_ns::num_types] - [td_ns::num_types]; -static binary_inplace_strided_impl_fn_ptr_t - bitwise_xor_inplace_strided_dispatch_table[td_ns::num_types] - [td_ns::num_types]; - -void populate_bitwise_xor_dispatch_tables(void) -{ - using namespace td_ns; - namespace fn_ns = bitwise_xor_fn_ns; - - // which input types are supported, and what is the type of the result - using fn_ns::BitwiseXorTypeMapFactory; - DispatchTableBuilder dtb1; - dtb1.populate_dispatch_table(bitwise_xor_output_id_table); - - // function pointers for operation on general strided arrays - using fn_ns::BitwiseXorStridedFactory; - DispatchTableBuilder - dtb2; - dtb2.populate_dispatch_table(bitwise_xor_strided_dispatch_table); - - // function pointers for operation on contiguous inputs and output - using fn_ns::BitwiseXorContigFactory; - DispatchTableBuilder - dtb3; - dtb3.populate_dispatch_table(bitwise_xor_contig_dispatch_table); - - // function pointers for inplace operation on general strided arrays - using fn_ns::BitwiseXorInplaceStridedFactory; - DispatchTableBuilder - dtb4; - dtb4.populate_dispatch_table(bitwise_xor_inplace_strided_dispatch_table); - - // function pointers for inplace operation on contiguous inputs and output - using fn_ns::BitwiseXorInplaceContigFactory; - DispatchTableBuilder - dtb5; - dtb5.populate_dispatch_table(bitwise_xor_inplace_contig_dispatch_table); -}; -} // namespace impl - -// ========================================================================================== -// // - -namespace py = pybind11; - -void init_elementwise_functions2(py::module_ m) -{ - - using arrayT = dpctl::tensor::usm_ndarray; - using event_vecT = std::vector; - - // U01: ==== ABS (x) - { - impl::populate_abs_dispatch_vectors(); - using impl::abs_contig_dispatch_vector; - using impl::abs_output_typeid_vector; - using impl::abs_strided_dispatch_vector; - - auto abs_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, abs_output_typeid_vector, - abs_contig_dispatch_vector, abs_strided_dispatch_vector); - }; - m.def("_abs", abs_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto abs_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, abs_output_typeid_vector); - }; - m.def("_abs_result_type", abs_result_type_pyapi); - } - - // U02: ==== ACOS (x) - { - impl::populate_acos_dispatch_vectors(); - using impl::acos_contig_dispatch_vector; - using impl::acos_output_typeid_vector; - using impl::acos_strided_dispatch_vector; - - auto acos_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, acos_output_typeid_vector, - acos_contig_dispatch_vector, acos_strided_dispatch_vector); - }; - m.def("_acos", acos_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto acos_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, acos_output_typeid_vector); - }; - m.def("_acos_result_type", acos_result_type_pyapi); - } - - // U03: ===== ACOSH (x) - { - impl::populate_acosh_dispatch_vectors(); - using impl::acosh_contig_dispatch_vector; - using impl::acosh_output_typeid_vector; - using impl::acosh_strided_dispatch_vector; - - auto acosh_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, acosh_output_typeid_vector, - acosh_contig_dispatch_vector, acosh_strided_dispatch_vector); - }; - m.def("_acosh", acosh_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto acosh_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, - acosh_output_typeid_vector); - }; - m.def("_acosh_result_type", acosh_result_type_pyapi); - } - - // B01: ===== ADD (x1, x2) - { - impl::populate_add_dispatch_tables(); - using impl::add_contig_dispatch_table; - using impl::add_contig_matrix_contig_row_broadcast_dispatch_table; - using impl::add_contig_row_contig_matrix_broadcast_dispatch_table; - using impl::add_output_id_table; - using impl::add_strided_dispatch_table; - - auto add_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, - const dpctl::tensor::usm_ndarray &src2, - const dpctl::tensor::usm_ndarray &dst, - sycl::queue &exec_q, - const std::vector &depends = {}) { - return py_binary_ufunc( - src1, src2, dst, exec_q, depends, add_output_id_table, - // function pointers to handle operation on contiguous arrays - // (pointers may be nullptr) - add_contig_dispatch_table, - // function pointers to handle operation on strided arrays (most - // general case) - add_strided_dispatch_table, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - add_contig_matrix_contig_row_broadcast_dispatch_table, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - add_contig_row_contig_matrix_broadcast_dispatch_table); - }; - auto add_result_type_pyapi = [&](const py::dtype &dtype1, - const py::dtype &dtype2) { - return py_binary_ufunc_result_type(dtype1, dtype2, - add_output_id_table); - }; - m.def("_add", add_pyapi, "", py::arg("src1"), py::arg("src2"), - py::arg("dst"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - m.def("_add_result_type", add_result_type_pyapi, ""); - - using impl::add_inplace_contig_dispatch_table; - using impl::add_inplace_row_matrix_dispatch_table; - using impl::add_inplace_strided_dispatch_table; - - auto add_inplace_pyapi = - [&](const dpctl::tensor::usm_ndarray &src, - const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, - const std::vector &depends = {}) { - return py_binary_inplace_ufunc( - src, dst, exec_q, depends, add_output_id_table, - // function pointers to handle inplace operation on - // contiguous arrays (pointers may be nullptr) - add_inplace_contig_dispatch_table, - // function pointers to handle inplace operation on strided - // arrays (most general case) - add_inplace_strided_dispatch_table, - // function pointers to handle inplace operation on - // c-contig matrix with c-contig row with broadcasting - // (may be nullptr) - add_inplace_row_matrix_dispatch_table); - }; - m.def("_add_inplace", add_inplace_pyapi, "", py::arg("lhs"), - py::arg("rhs"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - } - - // U04: ===== ASIN (x) - { - impl::populate_asin_dispatch_vectors(); - using impl::asin_contig_dispatch_vector; - using impl::asin_output_typeid_vector; - using impl::asin_strided_dispatch_vector; - - auto asin_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, asin_output_typeid_vector, - asin_contig_dispatch_vector, asin_strided_dispatch_vector); - }; - m.def("_asin", asin_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto asin_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, asin_output_typeid_vector); - }; - m.def("_asin_result_type", asin_result_type_pyapi); - } - - // U05: ===== ASINH (x) - { - impl::populate_asinh_dispatch_vectors(); - using impl::asinh_contig_dispatch_vector; - using impl::asinh_output_typeid_vector; - using impl::asinh_strided_dispatch_vector; - - auto asinh_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, asinh_output_typeid_vector, - asinh_contig_dispatch_vector, asinh_strided_dispatch_vector); - }; - m.def("_asinh", asinh_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto asinh_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, - asinh_output_typeid_vector); - }; - m.def("_asinh_result_type", asinh_result_type_pyapi); - } - - // U06: ===== ATAN (x) - { - impl::populate_atan_dispatch_vectors(); - using impl::atan_contig_dispatch_vector; - using impl::atan_output_typeid_vector; - using impl::atan_strided_dispatch_vector; - - auto atan_pyapi = [&](arrayT src, arrayT dst, sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, atan_output_typeid_vector, - atan_contig_dispatch_vector, atan_strided_dispatch_vector); - }; - m.def("_atan", atan_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto atan_result_type_pyapi = [&](py::dtype dtype) { - return py_unary_ufunc_result_type(dtype, atan_output_typeid_vector); - }; - m.def("_atan_result_type", atan_result_type_pyapi); - } - - // B02: ===== ATAN2 (x1, x2) - { - impl::populate_atan2_dispatch_tables(); - using impl::atan2_contig_dispatch_table; - using impl::atan2_output_id_table; - using impl::atan2_strided_dispatch_table; - - auto atan2_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, - const dpctl::tensor::usm_ndarray &src2, - const dpctl::tensor::usm_ndarray &dst, - sycl::queue &exec_q, - const std::vector &depends = {}) { - return py_binary_ufunc( - src1, src2, dst, exec_q, depends, atan2_output_id_table, - // function pointers to handle operation on contiguous arrays - // (pointers may be nullptr) - atan2_contig_dispatch_table, - // function pointers to handle operation on strided arrays (most - // general case) - atan2_strided_dispatch_table, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); - }; - auto atan2_result_type_pyapi = [&](const py::dtype &dtype1, - const py::dtype &dtype2) { - return py_binary_ufunc_result_type(dtype1, dtype2, - atan2_output_id_table); - }; - m.def("_atan2", atan2_pyapi, "", py::arg("src1"), py::arg("src2"), - py::arg("dst"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - m.def("_atan2_result_type", atan2_result_type_pyapi, ""); - } - - // U07: ===== ATANH (x) - { - impl::populate_atanh_dispatch_vectors(); - using impl::atanh_contig_dispatch_vector; - using impl::atanh_output_typeid_vector; - using impl::atanh_strided_dispatch_vector; - - auto atanh_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, atanh_output_typeid_vector, - atanh_contig_dispatch_vector, atanh_strided_dispatch_vector); - }; - m.def("_atanh", atanh_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto atanh_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, - atanh_output_typeid_vector); - }; - m.def("_atanh_result_type", atanh_result_type_pyapi); - } - - // B03: ===== BITWISE_AND (x1, x2) - { - impl::populate_bitwise_and_dispatch_tables(); - using impl::bitwise_and_contig_dispatch_table; - using impl::bitwise_and_output_id_table; - using impl::bitwise_and_strided_dispatch_table; - - auto bitwise_and_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, - const dpctl::tensor::usm_ndarray &src2, - const dpctl::tensor::usm_ndarray &dst, - sycl::queue &exec_q, - const std::vector &depends = - {}) { - return py_binary_ufunc( - src1, src2, dst, exec_q, depends, bitwise_and_output_id_table, - // function pointers to handle operation on contiguous arrays - // (pointers may be nullptr) - bitwise_and_contig_dispatch_table, - // function pointers to handle operation on strided arrays (most - // general case) - bitwise_and_strided_dispatch_table, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); - }; - auto bitwise_and_result_type_pyapi = [&](const py::dtype &dtype1, - const py::dtype &dtype2) { - return py_binary_ufunc_result_type(dtype1, dtype2, - bitwise_and_output_id_table); - }; - m.def("_bitwise_and", bitwise_and_pyapi, "", py::arg("src1"), - py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - m.def("_bitwise_and_result_type", bitwise_and_result_type_pyapi, ""); - - using impl::bitwise_and_inplace_contig_dispatch_table; - using impl::bitwise_and_inplace_strided_dispatch_table; - - auto bitwise_and_inplace_pyapi = - [&](const dpctl::tensor::usm_ndarray &src, - const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, - const std::vector &depends = {}) { - return py_binary_inplace_ufunc( - src, dst, exec_q, depends, bitwise_and_output_id_table, - // function pointers to handle inplace operation on - // contiguous arrays (pointers may be nullptr) - bitwise_and_inplace_contig_dispatch_table, - // function pointers to handle inplace operation on strided - // arrays (most general case) - bitwise_and_inplace_strided_dispatch_table, - // function pointers to handle inplace operation on - // c-contig matrix with c-contig row with broadcasting - // (may be nullptr) - td_ns::NullPtrTable< - binary_inplace_row_matrix_broadcast_impl_fn_ptr_t>{}); - }; - m.def("_bitwise_and_inplace", bitwise_and_inplace_pyapi, "", - py::arg("lhs"), py::arg("rhs"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - } - - // B04: ===== BITWISE_LEFT_SHIFT (x1, x2) - { - impl::populate_bitwise_left_shift_dispatch_tables(); - using impl::bitwise_left_shift_contig_dispatch_table; - using impl::bitwise_left_shift_output_id_table; - using impl::bitwise_left_shift_strided_dispatch_table; - - auto bitwise_left_shift_pyapi = [&](const dpctl::tensor::usm_ndarray - &src1, - const dpctl::tensor::usm_ndarray - &src2, - const dpctl::tensor::usm_ndarray - &dst, - sycl::queue &exec_q, - const std::vector - &depends = {}) { - return py_binary_ufunc( - src1, src2, dst, exec_q, depends, - bitwise_left_shift_output_id_table, - // function pointers to handle operation on contiguous arrays - // (pointers may be nullptr) - bitwise_left_shift_contig_dispatch_table, - // function pointers to handle operation on strided arrays (most - // general case) - bitwise_left_shift_strided_dispatch_table, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); - }; - auto bitwise_left_shift_result_type_pyapi = - [&](const py::dtype &dtype1, const py::dtype &dtype2) { - return py_binary_ufunc_result_type( - dtype1, dtype2, bitwise_left_shift_output_id_table); - }; - m.def("_bitwise_left_shift", bitwise_left_shift_pyapi, "", - py::arg("src1"), py::arg("src2"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - m.def("_bitwise_left_shift_result_type", - bitwise_left_shift_result_type_pyapi, ""); - - using impl::bitwise_left_shift_inplace_contig_dispatch_table; - using impl::bitwise_left_shift_inplace_strided_dispatch_table; - - auto bitwise_left_shift_inplace_pyapi = - [&](const dpctl::tensor::usm_ndarray &src, - const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, - const std::vector &depends = {}) { - return py_binary_inplace_ufunc( - src, dst, exec_q, depends, - bitwise_left_shift_output_id_table, - // function pointers to handle inplace operation on - // contiguous arrays (pointers may be nullptr) - bitwise_left_shift_inplace_contig_dispatch_table, - // function pointers to handle inplace operation on strided - // arrays (most general case) - bitwise_left_shift_inplace_strided_dispatch_table, - // function pointers to handle inplace operation on - // c-contig matrix with c-contig row with broadcasting - // (may be nullptr) - td_ns::NullPtrTable< - binary_inplace_row_matrix_broadcast_impl_fn_ptr_t>{}); - }; - m.def("_bitwise_left_shift_inplace", bitwise_left_shift_inplace_pyapi, - "", py::arg("lhs"), py::arg("rhs"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - } - - // U08: ===== BITWISE_INVERT (x) - { - impl::populate_bitwise_invert_dispatch_vectors(); - using impl::bitwise_invert_contig_dispatch_vector; - using impl::bitwise_invert_output_typeid_vector; - using impl::bitwise_invert_strided_dispatch_vector; - - auto bitwise_invert_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc(src, dst, exec_q, depends, - bitwise_invert_output_typeid_vector, - bitwise_invert_contig_dispatch_vector, - bitwise_invert_strided_dispatch_vector); - }; - m.def("_bitwise_invert", bitwise_invert_pyapi, "", py::arg("src"), - py::arg("dst"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - - auto bitwise_invert_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type( - dtype, bitwise_invert_output_typeid_vector); - }; - m.def("_bitwise_invert_result_type", bitwise_invert_result_type_pyapi); - } - - // B05: ===== BITWISE_OR (x1, x2) - { - impl::populate_bitwise_or_dispatch_tables(); - using impl::bitwise_or_contig_dispatch_table; - using impl::bitwise_or_output_id_table; - using impl::bitwise_or_strided_dispatch_table; - - auto bitwise_or_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, - const dpctl::tensor::usm_ndarray &src2, - const dpctl::tensor::usm_ndarray &dst, - sycl::queue &exec_q, - const std::vector &depends = - {}) { - return py_binary_ufunc( - src1, src2, dst, exec_q, depends, bitwise_or_output_id_table, - // function pointers to handle operation on contiguous arrays - // (pointers may be nullptr) - bitwise_or_contig_dispatch_table, - // function pointers to handle operation on strided arrays (most - // general case) - bitwise_or_strided_dispatch_table, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); - }; - auto bitwise_or_result_type_pyapi = [&](const py::dtype &dtype1, - const py::dtype &dtype2) { - return py_binary_ufunc_result_type(dtype1, dtype2, - bitwise_or_output_id_table); - }; - m.def("_bitwise_or", bitwise_or_pyapi, "", py::arg("src1"), - py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - m.def("_bitwise_or_result_type", bitwise_or_result_type_pyapi, ""); - - using impl::bitwise_or_inplace_contig_dispatch_table; - using impl::bitwise_or_inplace_strided_dispatch_table; - - auto bitwise_or_inplace_pyapi = - [&](const dpctl::tensor::usm_ndarray &src, - const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, - const std::vector &depends = {}) { - return py_binary_inplace_ufunc( - src, dst, exec_q, depends, bitwise_or_output_id_table, - // function pointers to handle inplace operation on - // contiguous arrays (pointers may be nullptr) - bitwise_or_inplace_contig_dispatch_table, - // function pointers to handle inplace operation on strided - // arrays (most general case) - bitwise_or_inplace_strided_dispatch_table, - // function pointers to handle inplace operation on - // c-contig matrix with c-contig row with broadcasting - // (may be nullptr) - td_ns::NullPtrTable< - binary_inplace_row_matrix_broadcast_impl_fn_ptr_t>{}); - }; - m.def("_bitwise_or_inplace", bitwise_or_inplace_pyapi, "", - py::arg("lhs"), py::arg("rhs"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - } - - // B06: ===== BITWISE_RIGHT_SHIFT (x1, x2) - { - impl::populate_bitwise_right_shift_dispatch_tables(); - using impl::bitwise_right_shift_contig_dispatch_table; - using impl::bitwise_right_shift_output_id_table; - using impl::bitwise_right_shift_strided_dispatch_table; - - auto bitwise_right_shift_pyapi = [&](const dpctl::tensor::usm_ndarray - &src1, - const dpctl::tensor::usm_ndarray - &src2, - const dpctl::tensor::usm_ndarray - &dst, - sycl::queue &exec_q, - const std::vector - &depends = {}) { - return py_binary_ufunc( - src1, src2, dst, exec_q, depends, - bitwise_right_shift_output_id_table, - // function pointers to handle operation on contiguous arrays - // (pointers may be nullptr) - bitwise_right_shift_contig_dispatch_table, - // function pointers to handle operation on strided arrays (most - // general case) - bitwise_right_shift_strided_dispatch_table, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); - }; - auto bitwise_right_shift_result_type_pyapi = - [&](const py::dtype &dtype1, const py::dtype &dtype2) { - return py_binary_ufunc_result_type( - dtype1, dtype2, bitwise_right_shift_output_id_table); - }; - m.def("_bitwise_right_shift", bitwise_right_shift_pyapi, "", - py::arg("src1"), py::arg("src2"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - m.def("_bitwise_right_shift_result_type", - bitwise_right_shift_result_type_pyapi, ""); - - using impl::bitwise_right_shift_inplace_contig_dispatch_table; - using impl::bitwise_right_shift_inplace_strided_dispatch_table; - - auto bitwise_right_shift_inplace_pyapi = - [&](const dpctl::tensor::usm_ndarray &src, - const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, - const std::vector &depends = {}) { - return py_binary_inplace_ufunc( - src, dst, exec_q, depends, - bitwise_right_shift_output_id_table, - // function pointers to handle inplace operation on - // contiguous arrays (pointers may be nullptr) - bitwise_right_shift_inplace_contig_dispatch_table, - // function pointers to handle inplace operation on strided - // arrays (most general case) - bitwise_right_shift_inplace_strided_dispatch_table, - // function pointers to handle inplace operation on - // c-contig matrix with c-contig row with broadcasting - // (may be nullptr) - td_ns::NullPtrTable< - binary_inplace_row_matrix_broadcast_impl_fn_ptr_t>{}); - }; - m.def("_bitwise_right_shift_inplace", bitwise_right_shift_inplace_pyapi, - "", py::arg("lhs"), py::arg("rhs"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - } - - // B07: ===== BITWISE_XOR (x1, x2) - { - impl::populate_bitwise_xor_dispatch_tables(); - using impl::bitwise_xor_contig_dispatch_table; - using impl::bitwise_xor_output_id_table; - using impl::bitwise_xor_strided_dispatch_table; - - auto bitwise_xor_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, - const dpctl::tensor::usm_ndarray &src2, - const dpctl::tensor::usm_ndarray &dst, - sycl::queue &exec_q, - const std::vector &depends = - {}) { - return py_binary_ufunc( - src1, src2, dst, exec_q, depends, bitwise_xor_output_id_table, - // function pointers to handle operation on contiguous arrays - // (pointers may be nullptr) - bitwise_xor_contig_dispatch_table, - // function pointers to handle operation on strided arrays (most - // general case) - bitwise_xor_strided_dispatch_table, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); - }; - auto bitwise_xor_result_type_pyapi = [&](const py::dtype &dtype1, - const py::dtype &dtype2) { - return py_binary_ufunc_result_type(dtype1, dtype2, - bitwise_xor_output_id_table); - }; - m.def("_bitwise_xor", bitwise_xor_pyapi, "", py::arg("src1"), - py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - m.def("_bitwise_xor_result_type", bitwise_xor_result_type_pyapi, ""); - - using impl::bitwise_xor_inplace_contig_dispatch_table; - using impl::bitwise_xor_inplace_strided_dispatch_table; - - auto bitwise_xor_inplace_pyapi = - [&](const dpctl::tensor::usm_ndarray &src, - const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, - const std::vector &depends = {}) { - return py_binary_inplace_ufunc( - src, dst, exec_q, depends, bitwise_xor_output_id_table, - // function pointers to handle inplace operation on - // contiguous arrays (pointers may be nullptr) - bitwise_xor_inplace_contig_dispatch_table, - // function pointers to handle inplace operation on strided - // arrays (most general case) - bitwise_xor_inplace_strided_dispatch_table, - // function pointers to handle inplace operation on - // c-contig matrix with c-contig row with broadcasting - // (may be nullptr) - td_ns::NullPtrTable< - binary_inplace_row_matrix_broadcast_impl_fn_ptr_t>{}); - }; - m.def("_bitwise_xor_inplace", bitwise_xor_inplace_pyapi, "", - py::arg("lhs"), py::arg("rhs"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - } -} - -} // namespace py_internal -} // namespace tensor -} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions3.cpp b/dpctl/tensor/libtensor/source/elementwise_functions3.cpp deleted file mode 100644 index 6c1ed91506..0000000000 --- a/dpctl/tensor/libtensor/source/elementwise_functions3.cpp +++ /dev/null @@ -1,1407 +0,0 @@ -//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// -// -// Data Parallel Control (dpctl) -// -// Copyright 2020-2023 Intel Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// This file defines functions of dpctl.tensor._tensor_impl extensions, -/// specifically functions for elementwise operations. -//===----------------------------------------------------------------------===// - -#include "dpctl4pybind11.hpp" -#include -#include -#include -#include -#include - -#include "elementwise_functions.hpp" -#include "elementwise_functions3.hpp" -#include "utils/type_dispatch.hpp" - -#include "kernels/elementwise_functions/not_equal.hpp" -#include "kernels/elementwise_functions/positive.hpp" -#include "kernels/elementwise_functions/pow.hpp" -#include "kernels/elementwise_functions/proj.hpp" -#include "kernels/elementwise_functions/real.hpp" -#include "kernels/elementwise_functions/remainder.hpp" -#include "kernels/elementwise_functions/round.hpp" -#include "kernels/elementwise_functions/rsqrt.hpp" -#include "kernels/elementwise_functions/sign.hpp" -#include "kernels/elementwise_functions/signbit.hpp" -#include "kernels/elementwise_functions/sin.hpp" -#include "kernels/elementwise_functions/sinh.hpp" -#include "kernels/elementwise_functions/sqrt.hpp" -#include "kernels/elementwise_functions/square.hpp" -#include "kernels/elementwise_functions/subtract.hpp" -#include "kernels/elementwise_functions/tan.hpp" -#include "kernels/elementwise_functions/tanh.hpp" -#include "kernels/elementwise_functions/trunc.hpp" - -namespace dpctl -{ -namespace tensor -{ -namespace py_internal -{ - -namespace td_ns = dpctl::tensor::type_dispatch; - -namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; -using ew_cmn_ns::binary_contig_impl_fn_ptr_t; -using ew_cmn_ns::binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t; -using ew_cmn_ns::binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t; -using ew_cmn_ns::binary_strided_impl_fn_ptr_t; -using ew_cmn_ns::unary_contig_impl_fn_ptr_t; -using ew_cmn_ns::unary_strided_impl_fn_ptr_t; - -using ew_cmn_ns::binary_inplace_contig_impl_fn_ptr_t; -using ew_cmn_ns::binary_inplace_row_matrix_broadcast_impl_fn_ptr_t; -using ew_cmn_ns::binary_inplace_strided_impl_fn_ptr_t; - -// B20: ==== NOT_EQUAL (x1, x2) -namespace impl -{ -namespace not_equal_fn_ns = dpctl::tensor::kernels::not_equal; - -static binary_contig_impl_fn_ptr_t - not_equal_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; -static int not_equal_output_id_table[td_ns::num_types][td_ns::num_types]; - -static binary_strided_impl_fn_ptr_t - not_equal_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; - -void populate_not_equal_dispatch_tables(void) -{ - using namespace td_ns; - namespace fn_ns = not_equal_fn_ns; - - // which input types are supported, and what is the type of the result - using fn_ns::NotEqualTypeMapFactory; - DispatchTableBuilder dtb1; - dtb1.populate_dispatch_table(not_equal_output_id_table); - - // function pointers for operation on general strided arrays - using fn_ns::NotEqualStridedFactory; - DispatchTableBuilder - dtb2; - dtb2.populate_dispatch_table(not_equal_strided_dispatch_table); - - // function pointers for operation on contiguous inputs and output - using fn_ns::NotEqualContigFactory; - DispatchTableBuilder - dtb3; - dtb3.populate_dispatch_table(not_equal_contig_dispatch_table); -}; -} // namespace impl - -// U26: ==== POSITIVE (x) -namespace impl -{ - -namespace positive_fn_ns = dpctl::tensor::kernels::positive; - -static unary_contig_impl_fn_ptr_t - positive_contig_dispatch_vector[td_ns::num_types]; -static int positive_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - positive_strided_dispatch_vector[td_ns::num_types]; - -void populate_positive_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = positive_fn_ns; - - using fn_ns::PositiveContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(positive_contig_dispatch_vector); - - using fn_ns::PositiveStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(positive_strided_dispatch_vector); - - using fn_ns::PositiveTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(positive_output_typeid_vector); -} - -} // namespace impl - -// B21: ==== POW (x1, x2) -namespace impl -{ - -namespace pow_fn_ns = dpctl::tensor::kernels::pow; - -static binary_contig_impl_fn_ptr_t pow_contig_dispatch_table[td_ns::num_types] - [td_ns::num_types]; -static int pow_output_id_table[td_ns::num_types][td_ns::num_types]; - -static binary_strided_impl_fn_ptr_t - pow_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; - -static binary_inplace_contig_impl_fn_ptr_t - pow_inplace_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; -static binary_inplace_strided_impl_fn_ptr_t - pow_inplace_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; - -void populate_pow_dispatch_tables(void) -{ - using namespace td_ns; - namespace fn_ns = pow_fn_ns; - - // which input types are supported, and what is the type of the result - using fn_ns::PowTypeMapFactory; - DispatchTableBuilder dtb1; - dtb1.populate_dispatch_table(pow_output_id_table); - - // function pointers for operation on general strided arrays - using fn_ns::PowStridedFactory; - DispatchTableBuilder - dtb2; - dtb2.populate_dispatch_table(pow_strided_dispatch_table); - - // function pointers for operation on contiguous inputs and output - using fn_ns::PowContigFactory; - DispatchTableBuilder - dtb3; - dtb3.populate_dispatch_table(pow_contig_dispatch_table); - - // function pointers for inplace operation on general strided arrays - using fn_ns::PowInplaceStridedFactory; - DispatchTableBuilder - dtb4; - dtb4.populate_dispatch_table(pow_inplace_strided_dispatch_table); - - // function pointers for inplace operation on contiguous inputs and output - using fn_ns::PowInplaceContigFactory; - DispatchTableBuilder - dtb5; - dtb5.populate_dispatch_table(pow_inplace_contig_dispatch_table); -}; - -} // namespace impl - -// U??: ==== PROJ (x) -namespace impl -{ - -namespace proj_fn_ns = dpctl::tensor::kernels::proj; - -static unary_contig_impl_fn_ptr_t proj_contig_dispatch_vector[td_ns::num_types]; -static int proj_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - proj_strided_dispatch_vector[td_ns::num_types]; - -void populate_proj_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = proj_fn_ns; - - using fn_ns::ProjContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(proj_contig_dispatch_vector); - - using fn_ns::ProjStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(proj_strided_dispatch_vector); - - using fn_ns::ProjTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(proj_output_typeid_vector); -} -} // namespace impl - -// U27: ==== REAL (x) -namespace impl -{ - -namespace real_fn_ns = dpctl::tensor::kernels::real; - -static unary_contig_impl_fn_ptr_t real_contig_dispatch_vector[td_ns::num_types]; -static int real_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - real_strided_dispatch_vector[td_ns::num_types]; - -void populate_real_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = real_fn_ns; - - using fn_ns::RealContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(real_contig_dispatch_vector); - - using fn_ns::RealStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(real_strided_dispatch_vector); - - using fn_ns::RealTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(real_output_typeid_vector); -} -} // namespace impl - -// B22: ==== REMAINDER (x1, x2) -namespace impl -{ - -namespace remainder_fn_ns = dpctl::tensor::kernels::remainder; - -static binary_contig_impl_fn_ptr_t - remainder_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; -static int remainder_output_id_table[td_ns::num_types][td_ns::num_types]; - -static binary_strided_impl_fn_ptr_t - remainder_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; - -static binary_inplace_contig_impl_fn_ptr_t - remainder_inplace_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; -static binary_inplace_strided_impl_fn_ptr_t - remainder_inplace_strided_dispatch_table[td_ns::num_types] - [td_ns::num_types]; - -void populate_remainder_dispatch_tables(void) -{ - using namespace td_ns; - namespace fn_ns = remainder_fn_ns; - - // which input types are supported, and what is the type of the result - using fn_ns::RemainderTypeMapFactory; - DispatchTableBuilder dtb1; - dtb1.populate_dispatch_table(remainder_output_id_table); - - // function pointers for operation on general strided arrays - using fn_ns::RemainderStridedFactory; - DispatchTableBuilder - dtb2; - dtb2.populate_dispatch_table(remainder_strided_dispatch_table); - - // function pointers for operation on contiguous inputs and output - using fn_ns::RemainderContigFactory; - DispatchTableBuilder - dtb3; - dtb3.populate_dispatch_table(remainder_contig_dispatch_table); - - // function pointers for inplace operation on general strided arrays - using fn_ns::RemainderInplaceStridedFactory; - DispatchTableBuilder - dtb4; - dtb4.populate_dispatch_table(remainder_inplace_strided_dispatch_table); - - // function pointers for inplace operation on contiguous inputs and output - using fn_ns::RemainderInplaceContigFactory; - DispatchTableBuilder - dtb5; - dtb5.populate_dispatch_table(remainder_inplace_contig_dispatch_table); -} - -} // namespace impl - -// U28: ==== ROUND (x) -namespace impl -{ - -namespace round_fn_ns = dpctl::tensor::kernels::round; - -static unary_contig_impl_fn_ptr_t - round_contig_dispatch_vector[td_ns::num_types]; -static int round_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - round_strided_dispatch_vector[td_ns::num_types]; - -void populate_round_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = round_fn_ns; - - using fn_ns::RoundContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(round_contig_dispatch_vector); - - using fn_ns::RoundStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(round_strided_dispatch_vector); - - using fn_ns::RoundTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(round_output_typeid_vector); -} - -} // namespace impl - -// U29: ==== SIGN (x) -namespace impl -{ - -namespace sign_fn_ns = dpctl::tensor::kernels::sign; - -static unary_contig_impl_fn_ptr_t sign_contig_dispatch_vector[td_ns::num_types]; -static int sign_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - sign_strided_dispatch_vector[td_ns::num_types]; - -void populate_sign_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = sign_fn_ns; - - using fn_ns::SignContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(sign_contig_dispatch_vector); - - using fn_ns::SignStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(sign_strided_dispatch_vector); - - using fn_ns::SignTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(sign_output_typeid_vector); -} - -} // namespace impl - -// ==== SIGNBIT (x) -namespace impl -{ - -namespace signbit_fn_ns = dpctl::tensor::kernels::signbit; - -static unary_contig_impl_fn_ptr_t - signbit_contig_dispatch_vector[td_ns::num_types]; -static int signbit_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - signbit_strided_dispatch_vector[td_ns::num_types]; - -void populate_signbit_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = signbit_fn_ns; - - using fn_ns::SignbitContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(signbit_contig_dispatch_vector); - - using fn_ns::SignbitStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(signbit_strided_dispatch_vector); - - using fn_ns::SignbitTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(signbit_output_typeid_vector); -} - -} // namespace impl - -// U30: ==== SIN (x) -namespace impl -{ - -namespace sin_fn_ns = dpctl::tensor::kernels::sin; - -static unary_contig_impl_fn_ptr_t sin_contig_dispatch_vector[td_ns::num_types]; -static int sin_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - sin_strided_dispatch_vector[td_ns::num_types]; - -void populate_sin_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = sin_fn_ns; - - using fn_ns::SinContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(sin_contig_dispatch_vector); - - using fn_ns::SinStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(sin_strided_dispatch_vector); - - using fn_ns::SinTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(sin_output_typeid_vector); -} - -} // namespace impl - -// U31: ==== SINH (x) -namespace impl -{ - -namespace sinh_fn_ns = dpctl::tensor::kernels::sinh; - -static unary_contig_impl_fn_ptr_t sinh_contig_dispatch_vector[td_ns::num_types]; -static int sinh_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - sinh_strided_dispatch_vector[td_ns::num_types]; - -void populate_sinh_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = sinh_fn_ns; - - using fn_ns::SinhContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(sinh_contig_dispatch_vector); - - using fn_ns::SinhStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(sinh_strided_dispatch_vector); - - using fn_ns::SinhTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(sinh_output_typeid_vector); -} - -} // namespace impl - -// U32: ==== SQUARE (x) -namespace impl -{ - -namespace square_fn_ns = dpctl::tensor::kernels::square; - -static unary_contig_impl_fn_ptr_t - square_contig_dispatch_vector[td_ns::num_types]; -static int square_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - square_strided_dispatch_vector[td_ns::num_types]; - -void populate_square_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = square_fn_ns; - - using fn_ns::SquareContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(square_contig_dispatch_vector); - - using fn_ns::SquareStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(square_strided_dispatch_vector); - - using fn_ns::SquareTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(square_output_typeid_vector); -} - -} // namespace impl - -// U33: ==== SQRT (x) -namespace impl -{ - -namespace sqrt_fn_ns = dpctl::tensor::kernels::sqrt; - -static unary_contig_impl_fn_ptr_t sqrt_contig_dispatch_vector[td_ns::num_types]; -static int sqrt_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - sqrt_strided_dispatch_vector[td_ns::num_types]; - -void populate_sqrt_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = sqrt_fn_ns; - - using fn_ns::SqrtContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(sqrt_contig_dispatch_vector); - - using fn_ns::SqrtStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(sqrt_strided_dispatch_vector); - - using fn_ns::SqrtTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(sqrt_output_typeid_vector); -} - -} // namespace impl - -// B23: ==== SUBTRACT (x1, x2) -namespace impl -{ -namespace subtract_fn_ns = dpctl::tensor::kernels::subtract; - -static binary_contig_impl_fn_ptr_t - subtract_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; -static int subtract_output_id_table[td_ns::num_types][td_ns::num_types]; - -static binary_strided_impl_fn_ptr_t - subtract_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; - -// sub(matrix, row) -static binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t - subtract_contig_matrix_contig_row_broadcast_dispatch_table - [td_ns::num_types][td_ns::num_types]; - -// sub(row, matrix) -static binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t - subtract_contig_row_contig_matrix_broadcast_dispatch_table - [td_ns::num_types][td_ns::num_types]; - -static binary_inplace_contig_impl_fn_ptr_t - subtract_inplace_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; -static binary_inplace_strided_impl_fn_ptr_t - subtract_inplace_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; -static binary_inplace_row_matrix_broadcast_impl_fn_ptr_t - subtract_inplace_row_matrix_dispatch_table[td_ns::num_types] - [td_ns::num_types]; - -void populate_subtract_dispatch_tables(void) -{ - using namespace td_ns; - namespace fn_ns = subtract_fn_ns; - - // which input types are supported, and what is the type of the result - using fn_ns::SubtractTypeMapFactory; - DispatchTableBuilder dtb1; - dtb1.populate_dispatch_table(subtract_output_id_table); - - // function pointers for operation on general strided arrays - using fn_ns::SubtractStridedFactory; - DispatchTableBuilder - dtb2; - dtb2.populate_dispatch_table(subtract_strided_dispatch_table); - - // function pointers for operation on contiguous inputs and output - using fn_ns::SubtractContigFactory; - DispatchTableBuilder - dtb3; - dtb3.populate_dispatch_table(subtract_contig_dispatch_table); - - // function pointers for operation on contiguous matrix, contiguous row - // with contiguous matrix output - using fn_ns::SubtractContigMatrixContigRowBroadcastFactory; - DispatchTableBuilder< - binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t, - SubtractContigMatrixContigRowBroadcastFactory, num_types> - dtb4; - dtb4.populate_dispatch_table( - subtract_contig_matrix_contig_row_broadcast_dispatch_table); - - // function pointers for operation on contiguous row, contiguous matrix - // with contiguous matrix output - using fn_ns::SubtractContigRowContigMatrixBroadcastFactory; - DispatchTableBuilder< - binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t, - SubtractContigRowContigMatrixBroadcastFactory, num_types> - dtb5; - dtb5.populate_dispatch_table( - subtract_contig_row_contig_matrix_broadcast_dispatch_table); - - // function pointers for inplace operation on general strided arrays - using fn_ns::SubtractInplaceStridedFactory; - DispatchTableBuilder - dtb6; - dtb6.populate_dispatch_table(subtract_inplace_strided_dispatch_table); - - // function pointers for inplace operation on contiguous inputs and output - using fn_ns::SubtractInplaceContigFactory; - DispatchTableBuilder - dtb7; - dtb7.populate_dispatch_table(subtract_inplace_contig_dispatch_table); - - // function pointers for inplace operation on contiguous matrix - // and contiguous row - using fn_ns::SubtractInplaceRowMatrixBroadcastFactory; - DispatchTableBuilder - dtb8; - dtb8.populate_dispatch_table(subtract_inplace_row_matrix_dispatch_table); -}; - -} // namespace impl - -// U34: ==== TAN (x) -namespace impl -{ - -namespace tan_fn_ns = dpctl::tensor::kernels::tan; - -static unary_contig_impl_fn_ptr_t tan_contig_dispatch_vector[td_ns::num_types]; -static int tan_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - tan_strided_dispatch_vector[td_ns::num_types]; - -void populate_tan_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = tan_fn_ns; - - using fn_ns::TanContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(tan_contig_dispatch_vector); - - using fn_ns::TanStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(tan_strided_dispatch_vector); - - using fn_ns::TanTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(tan_output_typeid_vector); -} - -} // namespace impl - -// U35: ==== TANH (x) -namespace impl -{ - -namespace tanh_fn_ns = dpctl::tensor::kernels::tanh; - -static unary_contig_impl_fn_ptr_t tanh_contig_dispatch_vector[td_ns::num_types]; -static int tanh_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - tanh_strided_dispatch_vector[td_ns::num_types]; - -void populate_tanh_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = tanh_fn_ns; - - using fn_ns::TanhContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(tanh_contig_dispatch_vector); - - using fn_ns::TanhStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(tanh_strided_dispatch_vector); - - using fn_ns::TanhTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(tanh_output_typeid_vector); -} - -} // namespace impl - -// U36: ==== TRUNC (x) -namespace impl -{ - -namespace trunc_fn_ns = dpctl::tensor::kernels::trunc; - -static unary_contig_impl_fn_ptr_t - trunc_contig_dispatch_vector[td_ns::num_types]; -static int trunc_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - trunc_strided_dispatch_vector[td_ns::num_types]; - -void populate_trunc_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = trunc_fn_ns; - - using fn_ns::TruncContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(trunc_contig_dispatch_vector); - - using fn_ns::TruncStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(trunc_strided_dispatch_vector); - - using fn_ns::TruncTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(trunc_output_typeid_vector); -} - -} // namespace impl - -// U39: ==== RSQRT (x) -namespace impl -{ - -namespace rsqrt_fn_ns = dpctl::tensor::kernels::rsqrt; - -static unary_contig_impl_fn_ptr_t - rsqrt_contig_dispatch_vector[td_ns::num_types]; -static int rsqrt_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - rsqrt_strided_dispatch_vector[td_ns::num_types]; - -void populate_rsqrt_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = rsqrt_fn_ns; - - using fn_ns::RsqrtContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(rsqrt_contig_dispatch_vector); - - using fn_ns::RsqrtStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(rsqrt_strided_dispatch_vector); - - using fn_ns::RsqrtTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(rsqrt_output_typeid_vector); -} - -} // namespace impl - -// ========================================================================================== -// // - -namespace py = pybind11; - -void init_elementwise_functions3(py::module_ m) -{ - - using arrayT = dpctl::tensor::usm_ndarray; - using event_vecT = std::vector; - - // B20: ==== NOT_EQUAL (x1, x2) - { - impl::populate_not_equal_dispatch_tables(); - using impl::not_equal_contig_dispatch_table; - using impl::not_equal_output_id_table; - using impl::not_equal_strided_dispatch_table; - - auto not_equal_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, - const dpctl::tensor::usm_ndarray &src2, - const dpctl::tensor::usm_ndarray &dst, - sycl::queue &exec_q, - const std::vector &depends = - {}) { - return py_binary_ufunc( - src1, src2, dst, exec_q, depends, not_equal_output_id_table, - // function pointers to handle operation on contiguous arrays - // (pointers may be nullptr) - not_equal_contig_dispatch_table, - // function pointers to handle operation on strided arrays (most - // general case) - not_equal_strided_dispatch_table, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); - }; - auto not_equal_result_type_pyapi = [&](const py::dtype &dtype1, - const py::dtype &dtype2) { - return py_binary_ufunc_result_type(dtype1, dtype2, - not_equal_output_id_table); - }; - m.def("_not_equal", not_equal_pyapi, "", py::arg("src1"), - py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - m.def("_not_equal_result_type", not_equal_result_type_pyapi, ""); - } - - // U26: ==== POSITIVE (x) - { - impl::populate_positive_dispatch_vectors(); - using impl::positive_contig_dispatch_vector; - using impl::positive_output_typeid_vector; - using impl::positive_strided_dispatch_vector; - - auto positive_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc(src, dst, exec_q, depends, - positive_output_typeid_vector, - positive_contig_dispatch_vector, - positive_strided_dispatch_vector); - }; - m.def("_positive", positive_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto positive_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, - positive_output_typeid_vector); - }; - m.def("_positive_result_type", positive_result_type_pyapi); - } - - // B21: ==== POW (x1, x2) - { - impl::populate_pow_dispatch_tables(); - using impl::pow_contig_dispatch_table; - using impl::pow_output_id_table; - using impl::pow_strided_dispatch_table; - - auto pow_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, - const dpctl::tensor::usm_ndarray &src2, - const dpctl::tensor::usm_ndarray &dst, - sycl::queue &exec_q, - const std::vector &depends = {}) { - return py_binary_ufunc( - src1, src2, dst, exec_q, depends, pow_output_id_table, - // function pointers to handle operation on contiguous arrays - // (pointers may be nullptr) - pow_contig_dispatch_table, - // function pointers to handle operation on strided arrays (most - // general case) - pow_strided_dispatch_table, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); - }; - auto pow_result_type_pyapi = [&](const py::dtype &dtype1, - const py::dtype &dtype2) { - return py_binary_ufunc_result_type(dtype1, dtype2, - pow_output_id_table); - }; - m.def("_pow", pow_pyapi, "", py::arg("src1"), py::arg("src2"), - py::arg("dst"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - m.def("_pow_result_type", pow_result_type_pyapi, ""); - - using impl::pow_inplace_contig_dispatch_table; - using impl::pow_inplace_strided_dispatch_table; - - auto pow_inplace_pyapi = - [&](const dpctl::tensor::usm_ndarray &src, - const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, - const std::vector &depends = {}) { - return py_binary_inplace_ufunc( - src, dst, exec_q, depends, pow_output_id_table, - // function pointers to handle inplace operation on - // contiguous arrays (pointers may be nullptr) - pow_inplace_contig_dispatch_table, - // function pointers to handle inplace operation on strided - // arrays (most general case) - pow_inplace_strided_dispatch_table, - // function pointers to handle inplace operation on - // c-contig matrix with c-contig row with broadcasting - // (may be nullptr) - td_ns::NullPtrTable< - binary_inplace_row_matrix_broadcast_impl_fn_ptr_t>{}); - }; - m.def("_pow_inplace", pow_inplace_pyapi, "", py::arg("lhs"), - py::arg("rhs"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - } - - // U??: ==== PROJ (x) - { - impl::populate_proj_dispatch_vectors(); - using impl::proj_contig_dispatch_vector; - using impl::proj_output_typeid_vector; - using impl::proj_strided_dispatch_vector; - - auto proj_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, proj_output_typeid_vector, - proj_contig_dispatch_vector, proj_strided_dispatch_vector); - }; - m.def("_proj", proj_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto proj_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, proj_output_typeid_vector); - }; - m.def("_proj_result_type", proj_result_type_pyapi); - } - - // U27: ==== REAL (x) - { - impl::populate_real_dispatch_vectors(); - using impl::real_contig_dispatch_vector; - using impl::real_output_typeid_vector; - using impl::real_strided_dispatch_vector; - - auto real_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, real_output_typeid_vector, - real_contig_dispatch_vector, real_strided_dispatch_vector); - }; - m.def("_real", real_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto real_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, real_output_typeid_vector); - }; - m.def("_real_result_type", real_result_type_pyapi); - } - - // B22: ==== REMAINDER (x1, x2) - { - impl::populate_remainder_dispatch_tables(); - using impl::remainder_contig_dispatch_table; - using impl::remainder_output_id_table; - using impl::remainder_strided_dispatch_table; - - auto remainder_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, - const dpctl::tensor::usm_ndarray &src2, - const dpctl::tensor::usm_ndarray &dst, - sycl::queue &exec_q, - const std::vector &depends = - {}) { - return py_binary_ufunc( - src1, src2, dst, exec_q, depends, remainder_output_id_table, - // function pointers to handle operation on contiguous arrays - // (pointers may be nullptr) - remainder_contig_dispatch_table, - // function pointers to handle operation on strided arrays (most - // general case) - remainder_strided_dispatch_table, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); - }; - auto remainder_result_type_pyapi = [&](const py::dtype &dtype1, - const py::dtype &dtype2) { - return py_binary_ufunc_result_type(dtype1, dtype2, - remainder_output_id_table); - }; - m.def("_remainder", remainder_pyapi, "", py::arg("src1"), - py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - m.def("_remainder_result_type", remainder_result_type_pyapi, ""); - - using impl::remainder_inplace_contig_dispatch_table; - using impl::remainder_inplace_strided_dispatch_table; - - auto remainder_inplace_pyapi = - [&](const dpctl::tensor::usm_ndarray &src, - const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, - const std::vector &depends = {}) { - return py_binary_inplace_ufunc( - src, dst, exec_q, depends, remainder_output_id_table, - // function pointers to handle inplace operation on - // contiguous arrays (pointers may be nullptr) - remainder_inplace_contig_dispatch_table, - // function pointers to handle inplace operation on strided - // arrays (most general case) - remainder_inplace_strided_dispatch_table, - // function pointers to handle inplace operation on - // c-contig matrix with c-contig row with broadcasting - // (may be nullptr) - td_ns::NullPtrTable< - binary_inplace_row_matrix_broadcast_impl_fn_ptr_t>{}); - }; - m.def("_remainder_inplace", remainder_inplace_pyapi, "", py::arg("lhs"), - py::arg("rhs"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - } - - // U28: ==== ROUND (x) - { - impl::populate_round_dispatch_vectors(); - using impl::round_contig_dispatch_vector; - using impl::round_output_typeid_vector; - using impl::round_strided_dispatch_vector; - - auto round_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, round_output_typeid_vector, - round_contig_dispatch_vector, round_strided_dispatch_vector); - }; - m.def("_round", round_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto round_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, - round_output_typeid_vector); - }; - m.def("_round_result_type", round_result_type_pyapi); - } - - // U29: ==== SIGN (x) - { - impl::populate_sign_dispatch_vectors(); - using impl::sign_contig_dispatch_vector; - using impl::sign_output_typeid_vector; - using impl::sign_strided_dispatch_vector; - - auto sign_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, sign_output_typeid_vector, - sign_contig_dispatch_vector, sign_strided_dispatch_vector); - }; - m.def("_sign", sign_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto sign_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, sign_output_typeid_vector); - }; - m.def("_sign_result_type", sign_result_type_pyapi); - } - - // ==== SIGNBIT (x) - { - impl::populate_signbit_dispatch_vectors(); - using impl::signbit_contig_dispatch_vector; - using impl::signbit_output_typeid_vector; - using impl::signbit_strided_dispatch_vector; - - auto signbit_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc(src, dst, exec_q, depends, - signbit_output_typeid_vector, - signbit_contig_dispatch_vector, - signbit_strided_dispatch_vector); - }; - m.def("_signbit", signbit_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto signbit_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, - signbit_output_typeid_vector); - }; - m.def("_signbit_result_type", signbit_result_type_pyapi); - } - - // U30: ==== SIN (x) - { - impl::populate_sin_dispatch_vectors(); - using impl::sin_contig_dispatch_vector; - using impl::sin_output_typeid_vector; - using impl::sin_strided_dispatch_vector; - - auto sin_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, sin_output_typeid_vector, - sin_contig_dispatch_vector, sin_strided_dispatch_vector); - }; - m.def("_sin", sin_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto sin_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, sin_output_typeid_vector); - }; - m.def("_sin_result_type", sin_result_type_pyapi); - } - // U31: ==== SINH (x) - { - impl::populate_sinh_dispatch_vectors(); - using impl::sinh_contig_dispatch_vector; - using impl::sinh_output_typeid_vector; - using impl::sinh_strided_dispatch_vector; - - auto sinh_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, sinh_output_typeid_vector, - sinh_contig_dispatch_vector, sinh_strided_dispatch_vector); - }; - m.def("_sinh", sinh_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto sinh_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, sinh_output_typeid_vector); - }; - m.def("_sinh_result_type", sinh_result_type_pyapi); - } - - // U32: ==== SQUARE (x) - { - impl::populate_square_dispatch_vectors(); - using impl::square_contig_dispatch_vector; - using impl::square_output_typeid_vector; - using impl::square_strided_dispatch_vector; - - auto square_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, square_output_typeid_vector, - square_contig_dispatch_vector, square_strided_dispatch_vector); - }; - m.def("_square", square_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto square_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, - square_output_typeid_vector); - }; - m.def("_square_result_type", square_result_type_pyapi); - } - - // U33: ==== SQRT (x) - { - impl::populate_sqrt_dispatch_vectors(); - using impl::sqrt_contig_dispatch_vector; - using impl::sqrt_output_typeid_vector; - using impl::sqrt_strided_dispatch_vector; - - auto sqrt_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, sqrt_output_typeid_vector, - sqrt_contig_dispatch_vector, sqrt_strided_dispatch_vector); - }; - m.def("_sqrt", sqrt_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto sqrt_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, sqrt_output_typeid_vector); - }; - m.def("_sqrt_result_type", sqrt_result_type_pyapi); - } - - // B23: ==== SUBTRACT (x1, x2) - { - impl::populate_subtract_dispatch_tables(); - using impl::subtract_contig_dispatch_table; - using impl::subtract_contig_matrix_contig_row_broadcast_dispatch_table; - using impl::subtract_contig_row_contig_matrix_broadcast_dispatch_table; - using impl::subtract_output_id_table; - using impl::subtract_strided_dispatch_table; - - auto subtract_pyapi = - [&](const dpctl::tensor::usm_ndarray &src1, - const dpctl::tensor::usm_ndarray &src2, - const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, - const std::vector &depends = {}) { - return py_binary_ufunc( - src1, src2, dst, exec_q, depends, subtract_output_id_table, - // function pointers to handle operation on contiguous - // arrays (pointers may be nullptr) - subtract_contig_dispatch_table, - // function pointers to handle operation on strided arrays - // (most general case) - subtract_strided_dispatch_table, - // function pointers to handle operation of c-contig matrix - // and c-contig row with broadcasting (may be nullptr) - subtract_contig_matrix_contig_row_broadcast_dispatch_table, - // function pointers to handle operation of c-contig matrix - // and c-contig row with broadcasting (may be nullptr) - subtract_contig_row_contig_matrix_broadcast_dispatch_table); - }; - auto subtract_result_type_pyapi = [&](const py::dtype &dtype1, - const py::dtype &dtype2) { - return py_binary_ufunc_result_type(dtype1, dtype2, - subtract_output_id_table); - }; - m.def("_subtract", subtract_pyapi, "", py::arg("src1"), py::arg("src2"), - py::arg("dst"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - m.def("_subtract_result_type", subtract_result_type_pyapi, ""); - - using impl::subtract_inplace_contig_dispatch_table; - using impl::subtract_inplace_row_matrix_dispatch_table; - using impl::subtract_inplace_strided_dispatch_table; - - auto subtract_inplace_pyapi = - [&](const dpctl::tensor::usm_ndarray &src, - const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, - const std::vector &depends = {}) { - return py_binary_inplace_ufunc( - src, dst, exec_q, depends, subtract_output_id_table, - // function pointers to handle inplace operation on - // contiguous arrays (pointers may be nullptr) - subtract_inplace_contig_dispatch_table, - // function pointers to handle inplace operation on strided - // arrays (most general case) - subtract_inplace_strided_dispatch_table, - // function pointers to handle inplace operation on - // c-contig matrix with c-contig row with broadcasting - // (may be nullptr) - subtract_inplace_row_matrix_dispatch_table); - }; - m.def("_subtract_inplace", subtract_inplace_pyapi, "", py::arg("lhs"), - py::arg("rhs"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - } - - // U34: ==== TAN (x) - { - impl::populate_tan_dispatch_vectors(); - using impl::tan_contig_dispatch_vector; - using impl::tan_output_typeid_vector; - using impl::tan_strided_dispatch_vector; - - auto tan_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, tan_output_typeid_vector, - tan_contig_dispatch_vector, tan_strided_dispatch_vector); - }; - m.def("_tan", tan_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto tan_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, tan_output_typeid_vector); - }; - m.def("_tan_result_type", tan_result_type_pyapi); - } - - // U35: ==== TANH (x) - { - impl::populate_tanh_dispatch_vectors(); - using impl::tanh_contig_dispatch_vector; - using impl::tanh_output_typeid_vector; - using impl::tanh_strided_dispatch_vector; - - auto tanh_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, tanh_output_typeid_vector, - tanh_contig_dispatch_vector, tanh_strided_dispatch_vector); - }; - m.def("_tanh", tanh_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto tanh_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, tanh_output_typeid_vector); - }; - m.def("_tanh_result_type", tanh_result_type_pyapi); - } - - // U36: ==== TRUNC (x) - { - impl::populate_trunc_dispatch_vectors(); - using impl::trunc_contig_dispatch_vector; - using impl::trunc_output_typeid_vector; - using impl::trunc_strided_dispatch_vector; - - auto trunc_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, trunc_output_typeid_vector, - trunc_contig_dispatch_vector, trunc_strided_dispatch_vector); - }; - m.def("_trunc", trunc_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto trunc_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, - trunc_output_typeid_vector); - }; - m.def("_trunc_result_type", trunc_result_type_pyapi); - } - - // U39: ==== RSQRT (x) - { - impl::populate_rsqrt_dispatch_vectors(); - using impl::rsqrt_contig_dispatch_vector; - using impl::rsqrt_output_typeid_vector; - using impl::rsqrt_strided_dispatch_vector; - - auto rsqrt_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, rsqrt_output_typeid_vector, - rsqrt_contig_dispatch_vector, rsqrt_strided_dispatch_vector); - }; - m.def("_rsqrt", rsqrt_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto rsqrt_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, - rsqrt_output_typeid_vector); - }; - m.def("_rsqrt_result_type", rsqrt_result_type_pyapi); - } -} - -} // namespace py_internal -} // namespace tensor -} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions4.cpp b/dpctl/tensor/libtensor/source/elementwise_functions4.cpp deleted file mode 100644 index dabaa1cb39..0000000000 --- a/dpctl/tensor/libtensor/source/elementwise_functions4.cpp +++ /dev/null @@ -1,1551 +0,0 @@ -//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// -// -// Data Parallel Control (dpctl) -// -// Copyright 2020-2023 Intel Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// This file defines functions of dpctl.tensor._tensor_impl extensions, -/// specifically functions for elementwise operations. -//===----------------------------------------------------------------------===// - -#include "dpctl4pybind11.hpp" -#include -#include -#include -#include -#include - -#include "elementwise_functions.hpp" -#include "elementwise_functions4.hpp" -#include "utils/type_dispatch.hpp" - -#include "kernels/elementwise_functions/cbrt.hpp" -#include "kernels/elementwise_functions/ceil.hpp" -#include "kernels/elementwise_functions/conj.hpp" -#include "kernels/elementwise_functions/copysign.hpp" -#include "kernels/elementwise_functions/cos.hpp" -#include "kernels/elementwise_functions/cosh.hpp" -#include "kernels/elementwise_functions/equal.hpp" -#include "kernels/elementwise_functions/exp.hpp" -#include "kernels/elementwise_functions/exp2.hpp" -#include "kernels/elementwise_functions/expm1.hpp" -#include "kernels/elementwise_functions/floor.hpp" -#include "kernels/elementwise_functions/floor_divide.hpp" -#include "kernels/elementwise_functions/greater.hpp" -#include "kernels/elementwise_functions/greater_equal.hpp" -#include "kernels/elementwise_functions/hypot.hpp" -#include "kernels/elementwise_functions/imag.hpp" -#include "kernels/elementwise_functions/isfinite.hpp" -#include "kernels/elementwise_functions/isinf.hpp" -#include "kernels/elementwise_functions/isnan.hpp" -#include "kernels/elementwise_functions/true_divide.hpp" - -namespace dpctl -{ -namespace tensor -{ -namespace py_internal -{ - -namespace td_ns = dpctl::tensor::type_dispatch; - -namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; -using ew_cmn_ns::binary_contig_impl_fn_ptr_t; -using ew_cmn_ns::binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t; -using ew_cmn_ns::binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t; -using ew_cmn_ns::binary_strided_impl_fn_ptr_t; -using ew_cmn_ns::unary_contig_impl_fn_ptr_t; -using ew_cmn_ns::unary_strided_impl_fn_ptr_t; - -using ew_cmn_ns::binary_inplace_contig_impl_fn_ptr_t; -using ew_cmn_ns::binary_inplace_row_matrix_broadcast_impl_fn_ptr_t; -using ew_cmn_ns::binary_inplace_strided_impl_fn_ptr_t; - -// U09: ==== CEIL (x) -namespace impl -{ - -namespace ceil_fn_ns = dpctl::tensor::kernels::ceil; - -static unary_contig_impl_fn_ptr_t ceil_contig_dispatch_vector[td_ns::num_types]; -static int ceil_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - ceil_strided_dispatch_vector[td_ns::num_types]; - -void populate_ceil_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = ceil_fn_ns; - - using fn_ns::CeilContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(ceil_contig_dispatch_vector); - - using fn_ns::CeilStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(ceil_strided_dispatch_vector); - - using fn_ns::CeilTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(ceil_output_typeid_vector); -} - -} // namespace impl - -// U10: ==== CONJ (x) -namespace impl -{ - -namespace conj_fn_ns = dpctl::tensor::kernels::conj; - -static unary_contig_impl_fn_ptr_t conj_contig_dispatch_vector[td_ns::num_types]; -static int conj_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - conj_strided_dispatch_vector[td_ns::num_types]; - -void populate_conj_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = conj_fn_ns; - - using fn_ns::ConjContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(conj_contig_dispatch_vector); - - using fn_ns::ConjStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(conj_strided_dispatch_vector); - - using fn_ns::ConjTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(conj_output_typeid_vector); -} -} // namespace impl - -// U11: ==== COS (x) -namespace impl -{ - -namespace cos_fn_ns = dpctl::tensor::kernels::cos; - -static unary_contig_impl_fn_ptr_t cos_contig_dispatch_vector[td_ns::num_types]; -static int cos_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - cos_strided_dispatch_vector[td_ns::num_types]; - -void populate_cos_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = cos_fn_ns; - - using fn_ns::CosContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(cos_contig_dispatch_vector); - - using fn_ns::CosStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(cos_strided_dispatch_vector); - - using fn_ns::CosTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(cos_output_typeid_vector); -} - -} // namespace impl - -// U12: ==== COSH (x) -namespace impl -{ - -namespace cosh_fn_ns = dpctl::tensor::kernels::cosh; - -static unary_contig_impl_fn_ptr_t cosh_contig_dispatch_vector[td_ns::num_types]; -static int cosh_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - cosh_strided_dispatch_vector[td_ns::num_types]; - -void populate_cosh_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = cosh_fn_ns; - - using fn_ns::CoshContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(cosh_contig_dispatch_vector); - - using fn_ns::CoshStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(cosh_strided_dispatch_vector); - - using fn_ns::CoshTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(cosh_output_typeid_vector); -} - -} // namespace impl - -// B08: ==== DIVIDE (x1, x2) -namespace impl -{ -namespace true_divide_fn_ns = dpctl::tensor::kernels::true_divide; - -static binary_contig_impl_fn_ptr_t - true_divide_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; -static int true_divide_output_id_table[td_ns::num_types][td_ns::num_types]; -static int true_divide_inplace_output_id_table[td_ns::num_types] - [td_ns::num_types]; - -static binary_strided_impl_fn_ptr_t - true_divide_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; - -// divide(matrix, row) -static binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t - true_divide_contig_matrix_contig_row_broadcast_dispatch_table - [td_ns::num_types][td_ns::num_types]; - -// divide(row, matrix) -static binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t - true_divide_contig_row_contig_matrix_broadcast_dispatch_table - [td_ns::num_types][td_ns::num_types]; - -static binary_inplace_contig_impl_fn_ptr_t - true_divide_inplace_contig_dispatch_table[td_ns::num_types] - [td_ns::num_types]; -static binary_inplace_strided_impl_fn_ptr_t - true_divide_inplace_strided_dispatch_table[td_ns::num_types] - [td_ns::num_types]; -static binary_inplace_row_matrix_broadcast_impl_fn_ptr_t - true_divide_inplace_row_matrix_dispatch_table[td_ns::num_types] - [td_ns::num_types]; - -void populate_true_divide_dispatch_tables(void) -{ - using namespace td_ns; - namespace fn_ns = true_divide_fn_ns; - - // which input types are supported, and what is the type of the result - using fn_ns::TrueDivideTypeMapFactory; - DispatchTableBuilder dtb1; - dtb1.populate_dispatch_table(true_divide_output_id_table); - - // function pointers for operation on general strided arrays - using fn_ns::TrueDivideStridedFactory; - DispatchTableBuilder - dtb2; - dtb2.populate_dispatch_table(true_divide_strided_dispatch_table); - - // function pointers for operation on contiguous inputs and output - using fn_ns::TrueDivideContigFactory; - DispatchTableBuilder - dtb3; - dtb3.populate_dispatch_table(true_divide_contig_dispatch_table); - - // function pointers for operation on contiguous matrix, contiguous row - // with contiguous matrix output - using fn_ns::TrueDivideContigMatrixContigRowBroadcastFactory; - DispatchTableBuilder< - binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t, - TrueDivideContigMatrixContigRowBroadcastFactory, num_types> - dtb4; - dtb4.populate_dispatch_table( - true_divide_contig_matrix_contig_row_broadcast_dispatch_table); - - // function pointers for operation on contiguous row, contiguous matrix - // with contiguous matrix output - using fn_ns::TrueDivideContigRowContigMatrixBroadcastFactory; - DispatchTableBuilder< - binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t, - TrueDivideContigRowContigMatrixBroadcastFactory, num_types> - dtb5; - dtb5.populate_dispatch_table( - true_divide_contig_row_contig_matrix_broadcast_dispatch_table); - - // which input types are supported, and what is the type of the result - using fn_ns::TrueDivideInplaceTypeMapFactory; - DispatchTableBuilder dtb6; - dtb6.populate_dispatch_table(true_divide_inplace_output_id_table); - - // function pointers for inplace operation on general strided arrays - using fn_ns::TrueDivideInplaceStridedFactory; - DispatchTableBuilder - dtb7; - dtb7.populate_dispatch_table(true_divide_inplace_strided_dispatch_table); - - // function pointers for inplace operation on contiguous inputs and output - using fn_ns::TrueDivideInplaceContigFactory; - DispatchTableBuilder - dtb8; - dtb8.populate_dispatch_table(true_divide_inplace_contig_dispatch_table); - - // function pointers for inplace operation on contiguous matrix - // and contiguous row - using fn_ns::TrueDivideInplaceRowMatrixBroadcastFactory; - DispatchTableBuilder - dtb9; - dtb9.populate_dispatch_table(true_divide_inplace_row_matrix_dispatch_table); -}; - -} // namespace impl - -// B09: ==== EQUAL (x1, x2) -namespace impl -{ -namespace equal_fn_ns = dpctl::tensor::kernels::equal; - -static binary_contig_impl_fn_ptr_t - equal_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; -static int equal_output_id_table[td_ns::num_types][td_ns::num_types]; - -static binary_strided_impl_fn_ptr_t - equal_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; - -void populate_equal_dispatch_tables(void) -{ - using namespace td_ns; - namespace fn_ns = equal_fn_ns; - - // which input types are supported, and what is the type of the result - using fn_ns::EqualTypeMapFactory; - DispatchTableBuilder dtb1; - dtb1.populate_dispatch_table(equal_output_id_table); - - // function pointers for operation on general strided arrays - using fn_ns::EqualStridedFactory; - DispatchTableBuilder - dtb2; - dtb2.populate_dispatch_table(equal_strided_dispatch_table); - - // function pointers for operation on contiguous inputs and output - using fn_ns::EqualContigFactory; - DispatchTableBuilder - dtb3; - dtb3.populate_dispatch_table(equal_contig_dispatch_table); -}; -} // namespace impl - -// U13: ==== EXP (x) -namespace impl -{ - -namespace exp_fn_ns = dpctl::tensor::kernels::exp; - -static unary_contig_impl_fn_ptr_t exp_contig_dispatch_vector[td_ns::num_types]; -static int exp_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - exp_strided_dispatch_vector[td_ns::num_types]; - -void populate_exp_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = exp_fn_ns; - - using fn_ns::ExpContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(exp_contig_dispatch_vector); - - using fn_ns::ExpStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(exp_strided_dispatch_vector); - - using fn_ns::ExpTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(exp_output_typeid_vector); -} - -} // namespace impl - -// U14: ==== EXPM1 (x) -namespace impl -{ - -namespace expm1_fn_ns = dpctl::tensor::kernels::expm1; - -static unary_contig_impl_fn_ptr_t - expm1_contig_dispatch_vector[td_ns::num_types]; -static int expm1_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - expm1_strided_dispatch_vector[td_ns::num_types]; - -void populate_expm1_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = expm1_fn_ns; - - using fn_ns::Expm1ContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(expm1_contig_dispatch_vector); - - using fn_ns::Expm1StridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(expm1_strided_dispatch_vector); - - using fn_ns::Expm1TypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(expm1_output_typeid_vector); -} - -} // namespace impl - -// U15: ==== FLOOR (x) -namespace impl -{ - -namespace floor_fn_ns = dpctl::tensor::kernels::floor; - -static unary_contig_impl_fn_ptr_t - floor_contig_dispatch_vector[td_ns::num_types]; -static int floor_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - floor_strided_dispatch_vector[td_ns::num_types]; - -void populate_floor_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = floor_fn_ns; - - using fn_ns::FloorContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(floor_contig_dispatch_vector); - - using fn_ns::FloorStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(floor_strided_dispatch_vector); - - using fn_ns::FloorTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(floor_output_typeid_vector); -} - -} // namespace impl - -// B10: ==== FLOOR_DIVIDE (x1, x2) -namespace impl -{ -namespace floor_divide_fn_ns = dpctl::tensor::kernels::floor_divide; - -static binary_contig_impl_fn_ptr_t - floor_divide_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; -static int floor_divide_output_id_table[td_ns::num_types][td_ns::num_types]; - -static binary_strided_impl_fn_ptr_t - floor_divide_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; - -static binary_inplace_contig_impl_fn_ptr_t - floor_divide_inplace_contig_dispatch_table[td_ns::num_types] - [td_ns::num_types]; -static binary_inplace_strided_impl_fn_ptr_t - floor_divide_inplace_strided_dispatch_table[td_ns::num_types] - [td_ns::num_types]; - -void populate_floor_divide_dispatch_tables(void) -{ - using namespace td_ns; - namespace fn_ns = floor_divide_fn_ns; - - // which input types are supported, and what is the type of the result - using fn_ns::FloorDivideTypeMapFactory; - DispatchTableBuilder dtb1; - dtb1.populate_dispatch_table(floor_divide_output_id_table); - - // function pointers for operation on general strided arrays - using fn_ns::FloorDivideStridedFactory; - DispatchTableBuilder - dtb2; - dtb2.populate_dispatch_table(floor_divide_strided_dispatch_table); - - // function pointers for operation on contiguous inputs and output - using fn_ns::FloorDivideContigFactory; - DispatchTableBuilder - dtb3; - dtb3.populate_dispatch_table(floor_divide_contig_dispatch_table); - - // function pointers for inplace operation on general strided arrays - using fn_ns::FloorDivideInplaceStridedFactory; - DispatchTableBuilder - dtb4; - dtb4.populate_dispatch_table(floor_divide_inplace_strided_dispatch_table); - - // function pointers for inplace operation on contiguous inputs and output - using fn_ns::FloorDivideInplaceContigFactory; - DispatchTableBuilder - dtb5; - dtb5.populate_dispatch_table(floor_divide_inplace_contig_dispatch_table); -}; - -} // namespace impl - -// B11: ==== GREATER (x1, x2) -namespace impl -{ -namespace greater_fn_ns = dpctl::tensor::kernels::greater; - -static binary_contig_impl_fn_ptr_t - greater_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; -static int greater_output_id_table[td_ns::num_types][td_ns::num_types]; - -static binary_strided_impl_fn_ptr_t - greater_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; - -void populate_greater_dispatch_tables(void) -{ - using namespace td_ns; - namespace fn_ns = greater_fn_ns; - - // which input types are supported, and what is the type of the result - using fn_ns::GreaterTypeMapFactory; - DispatchTableBuilder dtb1; - dtb1.populate_dispatch_table(greater_output_id_table); - - // function pointers for operation on general strided arrays - using fn_ns::GreaterStridedFactory; - DispatchTableBuilder - dtb2; - dtb2.populate_dispatch_table(greater_strided_dispatch_table); - - // function pointers for operation on contiguous inputs and output - using fn_ns::GreaterContigFactory; - DispatchTableBuilder - dtb3; - dtb3.populate_dispatch_table(greater_contig_dispatch_table); -}; -} // namespace impl - -// B12: ==== GREATER_EQUAL (x1, x2) -namespace impl -{ -namespace greater_equal_fn_ns = dpctl::tensor::kernels::greater_equal; - -static binary_contig_impl_fn_ptr_t - greater_equal_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; -static int greater_equal_output_id_table[td_ns::num_types][td_ns::num_types]; - -static binary_strided_impl_fn_ptr_t - greater_equal_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; - -void populate_greater_equal_dispatch_tables(void) -{ - using namespace td_ns; - namespace fn_ns = greater_equal_fn_ns; - - // which input types are supported, and what is the type of the result - using fn_ns::GreaterEqualTypeMapFactory; - DispatchTableBuilder dtb1; - dtb1.populate_dispatch_table(greater_equal_output_id_table); - - // function pointers for operation on general strided arrays - using fn_ns::GreaterEqualStridedFactory; - DispatchTableBuilder - dtb2; - dtb2.populate_dispatch_table(greater_equal_strided_dispatch_table); - - // function pointers for operation on contiguous inputs and output - using fn_ns::GreaterEqualContigFactory; - DispatchTableBuilder - dtb3; - dtb3.populate_dispatch_table(greater_equal_contig_dispatch_table); -}; -} // namespace impl - -// U16: ==== IMAG (x) -namespace impl -{ - -namespace imag_fn_ns = dpctl::tensor::kernels::imag; - -static unary_contig_impl_fn_ptr_t imag_contig_dispatch_vector[td_ns::num_types]; -static int imag_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - imag_strided_dispatch_vector[td_ns::num_types]; - -void populate_imag_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = imag_fn_ns; - - using fn_ns::ImagContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(imag_contig_dispatch_vector); - - using fn_ns::ImagStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(imag_strided_dispatch_vector); - - using fn_ns::ImagTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(imag_output_typeid_vector); -} -} // namespace impl - -// U17: ==== ISFINITE (x) -namespace impl -{ -namespace isfinite_fn_ns = dpctl::tensor::kernels::isfinite; - -static unary_contig_impl_fn_ptr_t - isfinite_contig_dispatch_vector[td_ns::num_types]; -static int isfinite_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - isfinite_strided_dispatch_vector[td_ns::num_types]; - -void populate_isfinite_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = isfinite_fn_ns; - - using fn_ns::IsFiniteContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(isfinite_contig_dispatch_vector); - - using fn_ns::IsFiniteStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(isfinite_strided_dispatch_vector); - - using fn_ns::IsFiniteTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(isfinite_output_typeid_vector); -} - -} // namespace impl - -// U18: ==== ISINF (x) -namespace impl -{ -namespace isinf_fn_ns = dpctl::tensor::kernels::isinf; - -static unary_contig_impl_fn_ptr_t - isinf_contig_dispatch_vector[td_ns::num_types]; -static int isinf_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - isinf_strided_dispatch_vector[td_ns::num_types]; - -void populate_isinf_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = isinf_fn_ns; - - using fn_ns::IsInfContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(isinf_contig_dispatch_vector); - - using fn_ns::IsInfStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(isinf_strided_dispatch_vector); - - using fn_ns::IsInfTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(isinf_output_typeid_vector); -} - -} // namespace impl - -// U19: ==== ISNAN (x) -namespace impl -{ -namespace isnan_fn_ns = dpctl::tensor::kernels::isnan; - -static unary_contig_impl_fn_ptr_t - isnan_contig_dispatch_vector[td_ns::num_types]; -static int isnan_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - isnan_strided_dispatch_vector[td_ns::num_types]; - -void populate_isnan_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = isnan_fn_ns; - - using fn_ns::IsNanContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(isnan_contig_dispatch_vector); - - using fn_ns::IsNanStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(isnan_strided_dispatch_vector); - - using fn_ns::IsNanTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(isnan_output_typeid_vector); -} - -} // namespace impl - -// B24: ==== HYPOT (x1, x2) -namespace impl -{ -namespace hypot_fn_ns = dpctl::tensor::kernels::hypot; - -static binary_contig_impl_fn_ptr_t - hypot_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; -static int hypot_output_id_table[td_ns::num_types][td_ns::num_types]; - -static binary_strided_impl_fn_ptr_t - hypot_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; - -void populate_hypot_dispatch_tables(void) -{ - using namespace td_ns; - namespace fn_ns = hypot_fn_ns; - - // which input types are supported, and what is the type of the result - using fn_ns::HypotTypeMapFactory; - DispatchTableBuilder dtb1; - dtb1.populate_dispatch_table(hypot_output_id_table); - - // function pointers for operation on general strided arrays - using fn_ns::HypotStridedFactory; - DispatchTableBuilder - dtb2; - dtb2.populate_dispatch_table(hypot_strided_dispatch_table); - - // function pointers for operation on contiguous inputs and output - using fn_ns::HypotContigFactory; - DispatchTableBuilder - dtb3; - dtb3.populate_dispatch_table(hypot_contig_dispatch_table); -}; - -} // namespace impl - -// U37: ==== CBRT (x) -namespace impl -{ - -namespace cbrt_fn_ns = dpctl::tensor::kernels::cbrt; - -static unary_contig_impl_fn_ptr_t cbrt_contig_dispatch_vector[td_ns::num_types]; -static int cbrt_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - cbrt_strided_dispatch_vector[td_ns::num_types]; - -void populate_cbrt_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = cbrt_fn_ns; - - using fn_ns::CbrtContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(cbrt_contig_dispatch_vector); - - using fn_ns::CbrtStridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(cbrt_strided_dispatch_vector); - - using fn_ns::CbrtTypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(cbrt_output_typeid_vector); -} - -} // namespace impl - -// B24: ==== COPYSIGN (x1, x2) -namespace impl -{ -namespace copysign_fn_ns = dpctl::tensor::kernels::copysign; - -static binary_contig_impl_fn_ptr_t - copysign_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; -static int copysign_output_id_table[td_ns::num_types][td_ns::num_types]; - -static binary_strided_impl_fn_ptr_t - copysign_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; - -void populate_copysign_dispatch_tables(void) -{ - using namespace td_ns; - namespace fn_ns = copysign_fn_ns; - - // which input types are supported, and what is the type of the result - using fn_ns::CopysignTypeMapFactory; - DispatchTableBuilder dtb1; - dtb1.populate_dispatch_table(copysign_output_id_table); - - // function pointers for operation on general strided arrays - using fn_ns::CopysignStridedFactory; - DispatchTableBuilder - dtb2; - dtb2.populate_dispatch_table(copysign_strided_dispatch_table); - - // function pointers for operation on contiguous inputs and output - using fn_ns::CopysignContigFactory; - DispatchTableBuilder - dtb3; - dtb3.populate_dispatch_table(copysign_contig_dispatch_table); -}; - -} // namespace impl - -// U38: ==== EXP2 (x) -namespace impl -{ - -namespace exp2_fn_ns = dpctl::tensor::kernels::exp2; - -static unary_contig_impl_fn_ptr_t exp2_contig_dispatch_vector[td_ns::num_types]; -static int exp2_output_typeid_vector[td_ns::num_types]; -static unary_strided_impl_fn_ptr_t - exp2_strided_dispatch_vector[td_ns::num_types]; - -void populate_exp2_dispatch_vectors(void) -{ - using namespace td_ns; - namespace fn_ns = exp2_fn_ns; - - using fn_ns::Exp2ContigFactory; - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(exp2_contig_dispatch_vector); - - using fn_ns::Exp2StridedFactory; - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(exp2_strided_dispatch_vector); - - using fn_ns::Exp2TypeMapFactory; - DispatchVectorBuilder dvb3; - dvb3.populate_dispatch_vector(exp2_output_typeid_vector); -} - -} // namespace impl - -// ========================================================================================== -// // - -namespace py = pybind11; - -void init_elementwise_functions4(py::module_ m) -{ - - using arrayT = dpctl::tensor::usm_ndarray; - using event_vecT = std::vector; - - // U09: ==== CEIL (x) - { - impl::populate_ceil_dispatch_vectors(); - using impl::ceil_contig_dispatch_vector; - using impl::ceil_output_typeid_vector; - using impl::ceil_strided_dispatch_vector; - - auto ceil_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, ceil_output_typeid_vector, - ceil_contig_dispatch_vector, ceil_strided_dispatch_vector); - }; - m.def("_ceil", ceil_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto ceil_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, ceil_output_typeid_vector); - }; - m.def("_ceil_result_type", ceil_result_type_pyapi); - } - - // U10: ==== CONJ (x) - { - impl::populate_conj_dispatch_vectors(); - using impl::conj_contig_dispatch_vector; - using impl::conj_output_typeid_vector; - using impl::conj_strided_dispatch_vector; - - auto conj_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, conj_output_typeid_vector, - conj_contig_dispatch_vector, conj_strided_dispatch_vector); - }; - m.def("_conj", conj_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto conj_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, conj_output_typeid_vector); - }; - m.def("_conj_result_type", conj_result_type_pyapi); - } - - // U11: ==== COS (x) - { - impl::populate_cos_dispatch_vectors(); - using impl::cos_contig_dispatch_vector; - using impl::cos_output_typeid_vector; - using impl::cos_strided_dispatch_vector; - - auto cos_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, cos_output_typeid_vector, - cos_contig_dispatch_vector, cos_strided_dispatch_vector); - }; - m.def("_cos", cos_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto cos_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, cos_output_typeid_vector); - }; - m.def("_cos_result_type", cos_result_type_pyapi); - } - - // U12: ==== COSH (x) - { - impl::populate_cosh_dispatch_vectors(); - using impl::cosh_contig_dispatch_vector; - using impl::cosh_output_typeid_vector; - using impl::cosh_strided_dispatch_vector; - - auto cosh_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, cosh_output_typeid_vector, - cosh_contig_dispatch_vector, cosh_strided_dispatch_vector); - }; - m.def("_cosh", cosh_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto cosh_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, cosh_output_typeid_vector); - }; - m.def("_cosh_result_type", cosh_result_type_pyapi); - } - - // B08: ==== DIVIDE (x1, x2) - { - impl::populate_true_divide_dispatch_tables(); - using impl::true_divide_contig_dispatch_table; - using impl:: - true_divide_contig_matrix_contig_row_broadcast_dispatch_table; - using impl:: - true_divide_contig_row_contig_matrix_broadcast_dispatch_table; - using impl::true_divide_output_id_table; - using impl::true_divide_strided_dispatch_table; - - auto divide_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, - const dpctl::tensor::usm_ndarray &src2, - const dpctl::tensor::usm_ndarray &dst, - sycl::queue &exec_q, - const std::vector &depends = {}) { - return py_binary_ufunc( - src1, src2, dst, exec_q, depends, true_divide_output_id_table, - // function pointers to handle operation on contiguous arrays - // (pointers may be nullptr) - true_divide_contig_dispatch_table, - // function pointers to handle operation on strided arrays (most - // general case) - true_divide_strided_dispatch_table, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - true_divide_contig_matrix_contig_row_broadcast_dispatch_table, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - true_divide_contig_row_contig_matrix_broadcast_dispatch_table); - }; - auto divide_result_type_pyapi = [&](const py::dtype &dtype1, - const py::dtype &dtype2) { - return py_binary_ufunc_result_type(dtype1, dtype2, - true_divide_output_id_table); - }; - m.def("_divide", divide_pyapi, "", py::arg("src1"), py::arg("src2"), - py::arg("dst"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - m.def("_divide_result_type", divide_result_type_pyapi, ""); - - using impl::true_divide_inplace_contig_dispatch_table; - using impl::true_divide_inplace_output_id_table; - using impl::true_divide_inplace_row_matrix_dispatch_table; - using impl::true_divide_inplace_strided_dispatch_table; - - auto divide_inplace_pyapi = - [&](const dpctl::tensor::usm_ndarray &src, - const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, - const std::vector &depends = {}) { - return py_binary_inplace_ufunc( - src, dst, exec_q, depends, - true_divide_inplace_output_id_table, - // function pointers to handle inplace operation on - // contiguous arrays (pointers may be nullptr) - true_divide_inplace_contig_dispatch_table, - // function pointers to handle inplace operation on strided - // arrays (most general case) - true_divide_inplace_strided_dispatch_table, - // function pointers to handle inplace operation on - // c-contig matrix with c-contig row with broadcasting - // (may be nullptr) - true_divide_inplace_row_matrix_dispatch_table); - }; - m.def("_divide_inplace", divide_inplace_pyapi, "", py::arg("lhs"), - py::arg("rhs"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - } - - // B09: ==== EQUAL (x1, x2) - { - impl::populate_equal_dispatch_tables(); - using impl::equal_contig_dispatch_table; - using impl::equal_output_id_table; - using impl::equal_strided_dispatch_table; - - auto equal_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, - const dpctl::tensor::usm_ndarray &src2, - const dpctl::tensor::usm_ndarray &dst, - sycl::queue &exec_q, - const std::vector &depends = {}) { - return py_binary_ufunc( - src1, src2, dst, exec_q, depends, equal_output_id_table, - // function pointers to handle operation on contiguous arrays - // (pointers may be nullptr) - equal_contig_dispatch_table, - // function pointers to handle operation on strided arrays (most - // general case) - equal_strided_dispatch_table, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); - }; - auto equal_result_type_pyapi = [&](const py::dtype &dtype1, - const py::dtype &dtype2) { - return py_binary_ufunc_result_type(dtype1, dtype2, - equal_output_id_table); - }; - m.def("_equal", equal_pyapi, "", py::arg("src1"), py::arg("src2"), - py::arg("dst"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - m.def("_equal_result_type", equal_result_type_pyapi, ""); - } - - // U13: ==== EXP (x) - { - impl::populate_exp_dispatch_vectors(); - using impl::exp_contig_dispatch_vector; - using impl::exp_output_typeid_vector; - using impl::exp_strided_dispatch_vector; - - auto exp_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, exp_output_typeid_vector, - exp_contig_dispatch_vector, exp_strided_dispatch_vector); - }; - m.def("_exp", exp_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto exp_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, exp_output_typeid_vector); - }; - m.def("_exp_result_type", exp_result_type_pyapi); - } - - // U14: ==== EXPM1 (x) - { - impl::populate_expm1_dispatch_vectors(); - using impl::expm1_contig_dispatch_vector; - using impl::expm1_output_typeid_vector; - using impl::expm1_strided_dispatch_vector; - - auto expm1_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, expm1_output_typeid_vector, - expm1_contig_dispatch_vector, expm1_strided_dispatch_vector); - }; - m.def("_expm1", expm1_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto expm1_result_type_pyapi = [&](const py::dtype dtype) { - return py_unary_ufunc_result_type(dtype, - expm1_output_typeid_vector); - }; - m.def("_expm1_result_type", expm1_result_type_pyapi); - } - - // U15: ==== FLOOR (x) - { - impl::populate_floor_dispatch_vectors(); - using impl::floor_contig_dispatch_vector; - using impl::floor_output_typeid_vector; - using impl::floor_strided_dispatch_vector; - - auto floor_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, floor_output_typeid_vector, - floor_contig_dispatch_vector, floor_strided_dispatch_vector); - }; - m.def("_floor", floor_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto floor_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, - floor_output_typeid_vector); - }; - m.def("_floor_result_type", floor_result_type_pyapi); - } - - // B10: ==== FLOOR_DIVIDE (x1, x2) - { - impl::populate_floor_divide_dispatch_tables(); - using impl::floor_divide_contig_dispatch_table; - using impl::floor_divide_output_id_table; - using impl::floor_divide_strided_dispatch_table; - - auto floor_divide_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, - const dpctl::tensor::usm_ndarray &src2, - const dpctl::tensor::usm_ndarray &dst, - sycl::queue &exec_q, - const std::vector &depends = - {}) { - return py_binary_ufunc( - src1, src2, dst, exec_q, depends, floor_divide_output_id_table, - // function pointers to handle operation on contiguous arrays - // (pointers may be nullptr) - floor_divide_contig_dispatch_table, - // function pointers to handle operation on strided arrays (most - // general case) - floor_divide_strided_dispatch_table, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); - }; - auto floor_divide_result_type_pyapi = [&](const py::dtype &dtype1, - const py::dtype &dtype2) { - return py_binary_ufunc_result_type(dtype1, dtype2, - floor_divide_output_id_table); - }; - m.def("_floor_divide", floor_divide_pyapi, "", py::arg("src1"), - py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - m.def("_floor_divide_result_type", floor_divide_result_type_pyapi, ""); - - using impl::floor_divide_inplace_contig_dispatch_table; - using impl::floor_divide_inplace_strided_dispatch_table; - - auto floor_divide_inplace_pyapi = - [&](const dpctl::tensor::usm_ndarray &src, - const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, - const std::vector &depends = {}) { - return py_binary_inplace_ufunc( - src, dst, exec_q, depends, floor_divide_output_id_table, - // function pointers to handle inplace operation on - // contiguous arrays (pointers may be nullptr) - floor_divide_inplace_contig_dispatch_table, - // function pointers to handle inplace operation on strided - // arrays (most general case) - floor_divide_inplace_strided_dispatch_table, - // function pointers to handle inplace operation on - // c-contig matrix with c-contig row with broadcasting - // (may be nullptr) - td_ns::NullPtrTable< - binary_inplace_row_matrix_broadcast_impl_fn_ptr_t>{}); - }; - m.def("_floor_divide_inplace", floor_divide_inplace_pyapi, "", - py::arg("lhs"), py::arg("rhs"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - } - - // B11: ==== GREATER (x1, x2) - { - impl::populate_greater_dispatch_tables(); - using impl::greater_contig_dispatch_table; - using impl::greater_output_id_table; - using impl::greater_strided_dispatch_table; - - auto greater_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, - const dpctl::tensor::usm_ndarray &src2, - const dpctl::tensor::usm_ndarray &dst, - sycl::queue &exec_q, - const std::vector &depends = {}) { - return py_binary_ufunc( - src1, src2, dst, exec_q, depends, greater_output_id_table, - // function pointers to handle operation on contiguous arrays - // (pointers may be nullptr) - greater_contig_dispatch_table, - // function pointers to handle operation on strided arrays (most - // general case) - greater_strided_dispatch_table, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); - }; - auto greater_result_type_pyapi = [&](const py::dtype &dtype1, - const py::dtype &dtype2) { - return py_binary_ufunc_result_type(dtype1, dtype2, - greater_output_id_table); - }; - m.def("_greater", greater_pyapi, "", py::arg("src1"), py::arg("src2"), - py::arg("dst"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - m.def("_greater_result_type", greater_result_type_pyapi, ""); - } - - // B12: ==== GREATER_EQUAL (x1, x2) - { - impl::populate_greater_equal_dispatch_tables(); - using impl::greater_equal_contig_dispatch_table; - using impl::greater_equal_output_id_table; - using impl::greater_equal_strided_dispatch_table; - - auto greater_equal_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, - const dpctl::tensor::usm_ndarray &src2, - const dpctl::tensor::usm_ndarray &dst, - sycl::queue &exec_q, - const std::vector &depends = - {}) { - return py_binary_ufunc( - src1, src2, dst, exec_q, depends, greater_equal_output_id_table, - // function pointers to handle operation on contiguous arrays - // (pointers may be nullptr) - greater_equal_contig_dispatch_table, - // function pointers to handle operation on strided arrays (most - // general case) - greater_equal_strided_dispatch_table, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); - }; - auto greater_equal_result_type_pyapi = [&](const py::dtype &dtype1, - const py::dtype &dtype2) { - return py_binary_ufunc_result_type(dtype1, dtype2, - greater_equal_output_id_table); - }; - m.def("_greater_equal", greater_equal_pyapi, "", py::arg("src1"), - py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - m.def("_greater_equal_result_type", greater_equal_result_type_pyapi, - ""); - } - - // U16: ==== IMAG (x) - { - impl::populate_imag_dispatch_vectors(); - using impl::imag_contig_dispatch_vector; - using impl::imag_output_typeid_vector; - using impl::imag_strided_dispatch_vector; - - auto imag_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, imag_output_typeid_vector, - imag_contig_dispatch_vector, imag_strided_dispatch_vector); - }; - m.def("_imag", imag_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto imag_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, imag_output_typeid_vector); - }; - m.def("_imag_result_type", imag_result_type_pyapi); - } - - // U17: ==== ISFINITE (x) - { - impl::populate_isfinite_dispatch_vectors(); - - using impl::isfinite_contig_dispatch_vector; - using impl::isfinite_output_typeid_vector; - using impl::isfinite_strided_dispatch_vector; - auto isfinite_pyapi = - [&](const dpctl::tensor::usm_ndarray &src, - const dpctl::tensor::usm_ndarray &dst, sycl::queue &exec_q, - const std::vector &depends = {}) { - return py_unary_ufunc(src, dst, exec_q, depends, - isfinite_output_typeid_vector, - isfinite_contig_dispatch_vector, - isfinite_strided_dispatch_vector); - }; - auto isfinite_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, - isfinite_output_typeid_vector); - }; - m.def("_isfinite", isfinite_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - m.def("_isfinite_result_type", isfinite_result_type_pyapi, ""); - } - - // U18: ==== ISINF (x) - { - impl::populate_isinf_dispatch_vectors(); - - using impl::isinf_contig_dispatch_vector; - using impl::isinf_output_typeid_vector; - using impl::isinf_strided_dispatch_vector; - auto isinf_pyapi = [&](const dpctl::tensor::usm_ndarray &src, - const dpctl::tensor::usm_ndarray &dst, - sycl::queue &exec_q, - const std::vector &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, isinf_output_typeid_vector, - isinf_contig_dispatch_vector, isinf_strided_dispatch_vector); - }; - auto isinf_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, - isinf_output_typeid_vector); - }; - m.def("_isinf", isinf_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - m.def("_isinf_result_type", isinf_result_type_pyapi, ""); - } - - // U19: ==== ISNAN (x) - { - impl::populate_isnan_dispatch_vectors(); - - using impl::isnan_contig_dispatch_vector; - using impl::isnan_output_typeid_vector; - using impl::isnan_strided_dispatch_vector; - auto isnan_pyapi = [&](const dpctl::tensor::usm_ndarray &src, - const dpctl::tensor::usm_ndarray &dst, - sycl::queue &exec_q, - const std::vector &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, isnan_output_typeid_vector, - isnan_contig_dispatch_vector, isnan_strided_dispatch_vector); - }; - auto isnan_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, - isnan_output_typeid_vector); - }; - m.def("_isnan", isnan_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - m.def("_isnan_result_type", isnan_result_type_pyapi, ""); - } - - // B24: ==== HYPOT (x1, x2) - { - impl::populate_hypot_dispatch_tables(); - using impl::hypot_contig_dispatch_table; - using impl::hypot_output_id_table; - using impl::hypot_strided_dispatch_table; - - auto hypot_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, - const dpctl::tensor::usm_ndarray &src2, - const dpctl::tensor::usm_ndarray &dst, - sycl::queue &exec_q, - const std::vector &depends = {}) { - return py_binary_ufunc( - src1, src2, dst, exec_q, depends, hypot_output_id_table, - // function pointers to handle operation on contiguous arrays - // (pointers may be nullptr) - hypot_contig_dispatch_table, - // function pointers to handle operation on strided arrays (most - // general case) - hypot_strided_dispatch_table, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); - }; - auto hypot_result_type_pyapi = [&](const py::dtype &dtype1, - const py::dtype &dtype2) { - return py_binary_ufunc_result_type(dtype1, dtype2, - hypot_output_id_table); - }; - m.def("_hypot", hypot_pyapi, "", py::arg("src1"), py::arg("src2"), - py::arg("dst"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - m.def("_hypot_result_type", hypot_result_type_pyapi, ""); - } - - // U37: ==== CBRT (x) - { - impl::populate_cbrt_dispatch_vectors(); - using impl::cbrt_contig_dispatch_vector; - using impl::cbrt_output_typeid_vector; - using impl::cbrt_strided_dispatch_vector; - - auto cbrt_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, cbrt_output_typeid_vector, - cbrt_contig_dispatch_vector, cbrt_strided_dispatch_vector); - }; - m.def("_cbrt", cbrt_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto cbrt_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, cbrt_output_typeid_vector); - }; - m.def("_cbrt_result_type", cbrt_result_type_pyapi); - } - - // B25: ==== COPYSIGN (x1, x2) - { - impl::populate_copysign_dispatch_tables(); - using impl::copysign_contig_dispatch_table; - using impl::copysign_output_id_table; - using impl::copysign_strided_dispatch_table; - - auto copysign_pyapi = [&](const dpctl::tensor::usm_ndarray &src1, - const dpctl::tensor::usm_ndarray &src2, - const dpctl::tensor::usm_ndarray &dst, - sycl::queue &exec_q, - const std::vector &depends = - {}) { - return py_binary_ufunc( - src1, src2, dst, exec_q, depends, copysign_output_id_table, - // function pointers to handle operation on contiguous arrays - // (pointers may be nullptr) - copysign_contig_dispatch_table, - // function pointers to handle operation on strided arrays (most - // general case) - copysign_strided_dispatch_table, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, - // function pointers to handle operation of c-contig matrix and - // c-contig row with broadcasting (may be nullptr) - td_ns::NullPtrTable< - binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); - }; - auto copysign_result_type_pyapi = [&](const py::dtype &dtype1, - const py::dtype &dtype2) { - return py_binary_ufunc_result_type(dtype1, dtype2, - copysign_output_id_table); - }; - m.def("_copysign", copysign_pyapi, "", py::arg("src1"), py::arg("src2"), - py::arg("dst"), py::arg("sycl_queue"), - py::arg("depends") = py::list()); - m.def("_copysign_result_type", copysign_result_type_pyapi, ""); - } - - // U38: ==== EXP2 (x) - { - impl::populate_exp2_dispatch_vectors(); - using impl::exp2_contig_dispatch_vector; - using impl::exp2_output_typeid_vector; - using impl::exp2_strided_dispatch_vector; - - auto exp2_pyapi = [&](const arrayT &src, const arrayT &dst, - sycl::queue &exec_q, - const event_vecT &depends = {}) { - return py_unary_ufunc( - src, dst, exec_q, depends, exp2_output_typeid_vector, - exp2_contig_dispatch_vector, exp2_strided_dispatch_vector); - }; - m.def("_exp2", exp2_pyapi, "", py::arg("src"), py::arg("dst"), - py::arg("sycl_queue"), py::arg("depends") = py::list()); - - auto exp2_result_type_pyapi = [&](const py::dtype &dtype) { - return py_unary_ufunc_result_type(dtype, exp2_output_typeid_vector); - }; - m.def("_exp2_result_type", exp2_result_type_pyapi); - } -} - -} // namespace py_internal -} // namespace tensor -} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/tensor_py.cpp b/dpctl/tensor/libtensor/source/tensor_py.cpp index 3ffd6216b9..282aecc95d 100644 --- a/dpctl/tensor/libtensor/source/tensor_py.cpp +++ b/dpctl/tensor/libtensor/source/tensor_py.cpp @@ -42,10 +42,7 @@ #include "copy_for_roll.hpp" #include "copy_numpy_ndarray_into_usm_ndarray.hpp" #include "device_support_queries.hpp" -#include "elementwise_functions1.hpp" -#include "elementwise_functions2.hpp" -#include "elementwise_functions3.hpp" -#include "elementwise_functions4.hpp" +#include "elementwise_functions/elementwise_common.hpp" #include "eye_ctor.hpp" #include "full_ctor.hpp" #include "integer_advanced_indexing.hpp" @@ -444,10 +441,7 @@ PYBIND11_MODULE(_tensor_impl, m) py::arg("reps"), py::arg("axis"), py::arg("sycl_queue"), py::arg("depends") = py::list()); - dpctl::tensor::py_internal::init_elementwise_functions1(m); - dpctl::tensor::py_internal::init_elementwise_functions2(m); - dpctl::tensor::py_internal::init_elementwise_functions3(m); - dpctl::tensor::py_internal::init_elementwise_functions4(m); + dpctl::tensor::py_internal::init_elementwise_functions(m); dpctl::tensor::py_internal::init_boolean_reduction_functions(m); dpctl::tensor::py_internal::init_reduction_functions(m); } From e5b306dda7796b4c884471bbd3d7a8981e05e2a6 Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Mon, 23 Oct 2023 22:13:54 -0700 Subject: [PATCH 13/19] Corrected numbers of elementwise functions --- dpctl/tensor/_elementwise_funcs.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dpctl/tensor/_elementwise_funcs.py b/dpctl/tensor/_elementwise_funcs.py index 906f37ee10..aa5ba04b19 100644 --- a/dpctl/tensor/_elementwise_funcs.py +++ b/dpctl/tensor/_elementwise_funcs.py @@ -1183,7 +1183,7 @@ _logical_xor_docstring_, ) -# B??: ==== MAXIMUM (x1, x2) +# B26: ==== MAXIMUM (x1, x2) _maximum_docstring_ = """ maximum(x1, x2, out=None, order='K') @@ -1213,7 +1213,7 @@ _maximum_docstring_, ) -# B??: ==== MINIMUM (x1, x2) +# B27: ==== MINIMUM (x1, x2) _minimum_docstring_ = """ minimum(x1, x2, out=None, order='K') @@ -1373,7 +1373,7 @@ binary_inplace_fn=ti._pow_inplace, ) -# U??: ==== PROJ (x) +# U40: ==== PROJ (x) _proj_docstring = """ proj(x, out=None, order='K') @@ -1514,7 +1514,7 @@ "sign", ti._sign_result_type, ti._sign, _sign_docstring ) -# ==== SIGNBIT (x) +# U41: ==== SIGNBIT (x) _signbit_docstring = """ signbit(x, out=None, order='K') From 534f190fd003033dbf632b854c996be6505ec43b Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Mon, 23 Oct 2023 22:14:16 -0700 Subject: [PATCH 14/19] Added missing vector include to elementwise function source files Removed utility include --- dpctl/tensor/libtensor/source/elementwise_functions/abs.cpp | 2 +- dpctl/tensor/libtensor/source/elementwise_functions/acos.cpp | 2 +- dpctl/tensor/libtensor/source/elementwise_functions/acosh.cpp | 2 +- dpctl/tensor/libtensor/source/elementwise_functions/add.cpp | 2 +- dpctl/tensor/libtensor/source/elementwise_functions/asin.cpp | 2 +- dpctl/tensor/libtensor/source/elementwise_functions/asinh.cpp | 2 +- dpctl/tensor/libtensor/source/elementwise_functions/atan.cpp | 2 +- dpctl/tensor/libtensor/source/elementwise_functions/atan2.cpp | 2 +- dpctl/tensor/libtensor/source/elementwise_functions/atanh.cpp | 2 +- .../libtensor/source/elementwise_functions/bitwise_and.cpp | 2 +- .../libtensor/source/elementwise_functions/bitwise_invert.cpp | 2 +- .../source/elementwise_functions/bitwise_left_shift.cpp | 2 +- .../libtensor/source/elementwise_functions/bitwise_or.cpp | 2 +- .../source/elementwise_functions/bitwise_right_shift.cpp | 2 +- .../libtensor/source/elementwise_functions/bitwise_xor.cpp | 2 +- dpctl/tensor/libtensor/source/elementwise_functions/cbrt.cpp | 4 ++-- dpctl/tensor/libtensor/source/elementwise_functions/ceil.cpp | 2 +- dpctl/tensor/libtensor/source/elementwise_functions/conj.cpp | 2 +- .../libtensor/source/elementwise_functions/copysign.cpp | 4 ++-- dpctl/tensor/libtensor/source/elementwise_functions/cos.cpp | 2 +- dpctl/tensor/libtensor/source/elementwise_functions/cosh.cpp | 2 +- .../source/elementwise_functions/elementwise_functions.hpp | 1 - dpctl/tensor/libtensor/source/elementwise_functions/equal.cpp | 2 +- dpctl/tensor/libtensor/source/elementwise_functions/exp.cpp | 2 +- dpctl/tensor/libtensor/source/elementwise_functions/exp2.cpp | 4 ++-- dpctl/tensor/libtensor/source/elementwise_functions/expm1.cpp | 2 +- dpctl/tensor/libtensor/source/elementwise_functions/floor.cpp | 2 +- .../libtensor/source/elementwise_functions/floor_divide.cpp | 2 +- .../tensor/libtensor/source/elementwise_functions/greater.cpp | 2 +- .../libtensor/source/elementwise_functions/greater_equal.cpp | 2 +- dpctl/tensor/libtensor/source/elementwise_functions/hypot.cpp | 4 ++-- dpctl/tensor/libtensor/source/elementwise_functions/imag.cpp | 2 +- .../libtensor/source/elementwise_functions/isfinite.cpp | 2 +- dpctl/tensor/libtensor/source/elementwise_functions/isinf.cpp | 2 +- dpctl/tensor/libtensor/source/elementwise_functions/isnan.cpp | 2 +- dpctl/tensor/libtensor/source/elementwise_functions/less.cpp | 2 +- .../libtensor/source/elementwise_functions/less_equal.cpp | 2 +- dpctl/tensor/libtensor/source/elementwise_functions/log.cpp | 2 +- dpctl/tensor/libtensor/source/elementwise_functions/log10.cpp | 2 +- dpctl/tensor/libtensor/source/elementwise_functions/log1p.cpp | 2 +- dpctl/tensor/libtensor/source/elementwise_functions/log2.cpp | 2 +- .../libtensor/source/elementwise_functions/logaddexp.cpp | 2 +- .../libtensor/source/elementwise_functions/logical_and.cpp | 2 +- .../libtensor/source/elementwise_functions/logical_not.cpp | 2 +- .../libtensor/source/elementwise_functions/logical_or.cpp | 2 +- .../libtensor/source/elementwise_functions/logical_xor.cpp | 2 +- .../tensor/libtensor/source/elementwise_functions/maximum.cpp | 4 ++-- .../tensor/libtensor/source/elementwise_functions/minimum.cpp | 4 ++-- .../libtensor/source/elementwise_functions/multiply.cpp | 2 +- .../libtensor/source/elementwise_functions/negative.cpp | 2 +- .../libtensor/source/elementwise_functions/not_equal.cpp | 2 +- .../libtensor/source/elementwise_functions/positive.cpp | 2 +- dpctl/tensor/libtensor/source/elementwise_functions/pow.cpp | 2 +- dpctl/tensor/libtensor/source/elementwise_functions/proj.cpp | 4 ++-- dpctl/tensor/libtensor/source/elementwise_functions/real.cpp | 2 +- .../libtensor/source/elementwise_functions/remainder.cpp | 2 +- dpctl/tensor/libtensor/source/elementwise_functions/round.cpp | 2 +- dpctl/tensor/libtensor/source/elementwise_functions/rsqrt.cpp | 2 +- dpctl/tensor/libtensor/source/elementwise_functions/sign.cpp | 2 +- .../tensor/libtensor/source/elementwise_functions/signbit.cpp | 4 ++-- dpctl/tensor/libtensor/source/elementwise_functions/sin.cpp | 2 +- dpctl/tensor/libtensor/source/elementwise_functions/sinh.cpp | 2 +- dpctl/tensor/libtensor/source/elementwise_functions/sqrt.cpp | 2 +- .../tensor/libtensor/source/elementwise_functions/square.cpp | 2 +- .../libtensor/source/elementwise_functions/subtract.cpp | 2 +- dpctl/tensor/libtensor/source/elementwise_functions/tan.cpp | 4 ++-- dpctl/tensor/libtensor/source/elementwise_functions/tanh.cpp | 4 ++-- .../libtensor/source/elementwise_functions/true_divide.cpp | 2 +- dpctl/tensor/libtensor/source/elementwise_functions/trunc.cpp | 4 ++-- 69 files changed, 79 insertions(+), 80 deletions(-) diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/abs.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/abs.cpp index e359ace261..4b3e8b635b 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions/abs.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/abs.cpp @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include "abs.hpp" #include "elementwise_functions.hpp" diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/acos.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/acos.cpp index da4827be05..011cc052fb 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions/acos.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/acos.cpp @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include "acos.hpp" #include "elementwise_functions.hpp" diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/acosh.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/acosh.cpp index 0ca029e604..526bd44f12 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions/acosh.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/acosh.cpp @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include "acosh.hpp" #include "elementwise_functions.hpp" diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/add.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/add.cpp index bd0818385f..247b8e0283 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions/add.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/add.cpp @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include "add.hpp" #include "elementwise_functions.hpp" diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/asin.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/asin.cpp index eb253cb747..14ef5e2665 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions/asin.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/asin.cpp @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include "asin.hpp" #include "elementwise_functions.hpp" diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/asinh.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/asinh.cpp index 6c89e3f592..dd0b4e62f7 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions/asinh.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/asinh.cpp @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include "asinh.hpp" #include "elementwise_functions.hpp" diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/atan.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/atan.cpp index 03d813813f..81ff00c46a 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions/atan.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/atan.cpp @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include "atan.hpp" #include "elementwise_functions.hpp" diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/atan2.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/atan2.cpp index 07a9ec17c3..d12a4ff540 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions/atan2.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/atan2.cpp @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include "atan2.hpp" #include "elementwise_functions.hpp" diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/atanh.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/atanh.cpp index 83b350b607..c42769b8d0 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions/atanh.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/atanh.cpp @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include "atanh.hpp" #include "elementwise_functions.hpp" diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/bitwise_and.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/bitwise_and.cpp index c125adb840..f86f5112cd 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions/bitwise_and.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/bitwise_and.cpp @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include "bitwise_and.hpp" #include "elementwise_functions.hpp" diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/bitwise_invert.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/bitwise_invert.cpp index 445a123a87..29a04cff38 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions/bitwise_invert.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/bitwise_invert.cpp @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include "bitwise_invert.hpp" #include "elementwise_functions.hpp" diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/bitwise_left_shift.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/bitwise_left_shift.cpp index 5ba54c66c8..7969bc4ffa 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions/bitwise_left_shift.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/bitwise_left_shift.cpp @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include "bitwise_left_shift.hpp" #include "elementwise_functions.hpp" diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/bitwise_or.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/bitwise_or.cpp index 5de3bfe013..33a57f907c 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions/bitwise_or.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/bitwise_or.cpp @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include "bitwise_or.hpp" #include "elementwise_functions.hpp" diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/bitwise_right_shift.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/bitwise_right_shift.cpp index e1285f79ac..3847204b1f 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions/bitwise_right_shift.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/bitwise_right_shift.cpp @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include "bitwise_right_shift.hpp" #include "elementwise_functions.hpp" diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/bitwise_xor.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/bitwise_xor.cpp index 18e60900cc..71d606766f 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions/bitwise_xor.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/bitwise_xor.cpp @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include "bitwise_xor.hpp" #include "elementwise_functions.hpp" diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/cbrt.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/cbrt.cpp index c526788496..b42f234c0d 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions/cbrt.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/cbrt.cpp @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include "cbrt.hpp" #include "elementwise_functions.hpp" @@ -52,7 +52,7 @@ namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; using ew_cmn_ns::unary_contig_impl_fn_ptr_t; using ew_cmn_ns::unary_strided_impl_fn_ptr_t; -// U??: ==== CBRT (x) +// U37: ==== CBRT (x) namespace impl { diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/ceil.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/ceil.cpp index b4a0c90156..f1bb362c5b 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions/ceil.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/ceil.cpp @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include "ceil.hpp" #include "elementwise_functions.hpp" diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/conj.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/conj.cpp index baec06fcb0..cac84e63fb 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions/conj.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/conj.cpp @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include "conj.hpp" #include "elementwise_functions.hpp" diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/copysign.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/copysign.cpp index 0983356d56..6a887e0345 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions/copysign.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/copysign.cpp @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include "copysign.hpp" #include "elementwise_functions.hpp" @@ -54,7 +54,7 @@ using ew_cmn_ns::binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t; using ew_cmn_ns::binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t; using ew_cmn_ns::binary_strided_impl_fn_ptr_t; -// B??: ===== COPYSIGN (x1, x2) +// B25: ===== COPYSIGN (x1, x2) namespace impl { namespace copysign_fn_ns = dpctl::tensor::kernels::copysign; diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/cos.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/cos.cpp index 86d36a35c5..1986610510 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions/cos.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/cos.cpp @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include "cos.hpp" #include "elementwise_functions.hpp" diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/cosh.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/cosh.cpp index 643415abcb..0bb74df979 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions/cosh.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/cosh.cpp @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include "cosh.hpp" #include "elementwise_functions.hpp" diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions.hpp index 3ac74de3c8..225ec91f48 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions.hpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions.hpp @@ -29,7 +29,6 @@ #include #include #include -#include #include #include "simplify_iteration_space.hpp" diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/equal.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/equal.cpp index 7af3e15d5c..f36ec1b446 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions/equal.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/equal.cpp @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include "elementwise_functions.hpp" #include "equal.hpp" diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/exp.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/exp.cpp index 4f550268e1..51ccaaac70 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions/exp.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/exp.cpp @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include "elementwise_functions.hpp" #include "exp.hpp" diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/exp2.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/exp2.cpp index 8454ce5c77..438ad0800e 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions/exp2.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/exp2.cpp @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include "elementwise_functions.hpp" #include "exp2.hpp" @@ -52,7 +52,7 @@ namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; using ew_cmn_ns::unary_contig_impl_fn_ptr_t; using ew_cmn_ns::unary_strided_impl_fn_ptr_t; -// U??: ==== EXP2 (x) +// U38: ==== EXP2 (x) namespace impl { diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/expm1.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/expm1.cpp index 16a6f5fb62..3b9332c4f1 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions/expm1.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/expm1.cpp @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include "elementwise_functions.hpp" #include "expm1.hpp" diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/floor.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/floor.cpp index 4279f32529..9ccf89f13a 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions/floor.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/floor.cpp @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include "elementwise_functions.hpp" #include "floor.hpp" diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/floor_divide.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/floor_divide.cpp index b453d5bad0..e75fc56c67 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions/floor_divide.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/floor_divide.cpp @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include "elementwise_functions.hpp" #include "floor_divide.hpp" diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/greater.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/greater.cpp index 9aee4afe20..f79102df47 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions/greater.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/greater.cpp @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include "elementwise_functions.hpp" #include "greater.hpp" diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/greater_equal.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/greater_equal.cpp index b1ec735c17..005679c3fb 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions/greater_equal.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/greater_equal.cpp @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include "elementwise_functions.hpp" #include "greater_equal.hpp" diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/hypot.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/hypot.cpp index 87c3c7a553..2442710198 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions/hypot.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/hypot.cpp @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include "elementwise_functions.hpp" #include "hypot.hpp" @@ -54,7 +54,7 @@ using ew_cmn_ns::binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t; using ew_cmn_ns::binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t; using ew_cmn_ns::binary_strided_impl_fn_ptr_t; -// B??: ===== HYPOT (x1, x2) +// B24: ===== HYPOT (x1, x2) namespace impl { namespace hypot_fn_ns = dpctl::tensor::kernels::hypot; diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/imag.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/imag.cpp index 33c57ebea3..4012b9206f 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions/imag.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/imag.cpp @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include "elementwise_functions.hpp" #include "imag.hpp" diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/isfinite.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/isfinite.cpp index 000a4a3a15..73a2be4010 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions/isfinite.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/isfinite.cpp @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include "elementwise_functions.hpp" #include "isfinite.hpp" diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/isinf.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/isinf.cpp index 7235c3cca4..2600fe4f74 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions/isinf.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/isinf.cpp @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include "elementwise_functions.hpp" #include "isinf.hpp" diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/isnan.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/isnan.cpp index 8849ff29fa..b75618c5e0 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions/isnan.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/isnan.cpp @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include "elementwise_functions.hpp" #include "isnan.hpp" diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/less.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/less.cpp index 48f30538f0..c34122d862 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions/less.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/less.cpp @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include "elementwise_functions.hpp" #include "less.hpp" diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/less_equal.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/less_equal.cpp index ce1a60d89f..712b30d902 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions/less_equal.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/less_equal.cpp @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include "elementwise_functions.hpp" #include "less_equal.hpp" diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/log.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/log.cpp index 2062cb8d59..f73b9e2414 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions/log.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/log.cpp @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include "elementwise_functions.hpp" #include "log.hpp" diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/log10.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/log10.cpp index 6e2cf79cc8..566dfcbcf7 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions/log10.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/log10.cpp @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include "elementwise_functions.hpp" #include "log10.hpp" diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/log1p.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/log1p.cpp index 0219af01de..badb474778 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions/log1p.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/log1p.cpp @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include "elementwise_functions.hpp" #include "log1p.hpp" diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/log2.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/log2.cpp index 6906977d73..b5a8a39684 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions/log2.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/log2.cpp @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include "elementwise_functions.hpp" #include "log2.hpp" diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/logaddexp.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/logaddexp.cpp index 21cfc9ce3b..77ded230be 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions/logaddexp.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/logaddexp.cpp @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include "elementwise_functions.hpp" #include "logaddexp.hpp" diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/logical_and.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/logical_and.cpp index 76a9b82f49..4c573ce508 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions/logical_and.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/logical_and.cpp @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include "elementwise_functions.hpp" #include "logical_and.hpp" diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/logical_not.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/logical_not.cpp index 58fa5d45d7..84362cd9ce 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions/logical_not.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/logical_not.cpp @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include "elementwise_functions.hpp" #include "logical_not.hpp" diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/logical_or.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/logical_or.cpp index a19ce43079..ebf8251b2e 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions/logical_or.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/logical_or.cpp @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include "elementwise_functions.hpp" #include "logical_or.hpp" diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/logical_xor.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/logical_xor.cpp index 0495b1721f..9488a5615a 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions/logical_xor.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/logical_xor.cpp @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include "elementwise_functions.hpp" #include "logical_xor.hpp" diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/maximum.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/maximum.cpp index 7bb818c74f..208bdcf47f 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions/maximum.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/maximum.cpp @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include "elementwise_functions.hpp" #include "maximum.hpp" @@ -54,7 +54,7 @@ using ew_cmn_ns::binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t; using ew_cmn_ns::binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t; using ew_cmn_ns::binary_strided_impl_fn_ptr_t; -// B??: ===== MAXIMUM (x1, x2) +// B26: ===== MAXIMUM (x1, x2) namespace impl { namespace maximum_fn_ns = dpctl::tensor::kernels::maximum; diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/minimum.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/minimum.cpp index 10753b981a..dc1a826ac4 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions/minimum.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/minimum.cpp @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include "elementwise_functions.hpp" #include "minimum.hpp" @@ -54,7 +54,7 @@ using ew_cmn_ns::binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t; using ew_cmn_ns::binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t; using ew_cmn_ns::binary_strided_impl_fn_ptr_t; -// B??: ===== MINIMUM (x1, x2) +// B27: ===== MINIMUM (x1, x2) namespace impl { namespace minimum_fn_ns = dpctl::tensor::kernels::minimum; diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/multiply.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/multiply.cpp index 9e8cf9808a..c087abd9ff 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions/multiply.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/multiply.cpp @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include "elementwise_functions.hpp" #include "multiply.hpp" diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/negative.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/negative.cpp index 0804c54bc3..bc659506d1 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions/negative.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/negative.cpp @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include "elementwise_functions.hpp" #include "negative.hpp" diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/not_equal.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/not_equal.cpp index 77b6534eb2..a7a3e909cb 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions/not_equal.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/not_equal.cpp @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include "elementwise_functions.hpp" #include "not_equal.hpp" diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/positive.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/positive.cpp index 0f3ce3a385..eaff0794d2 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions/positive.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/positive.cpp @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include "elementwise_functions.hpp" #include "positive.hpp" diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/pow.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/pow.cpp index da25d861d7..a8ef6cb171 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions/pow.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/pow.cpp @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include "elementwise_functions.hpp" #include "pow.hpp" diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/proj.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/proj.cpp index f973e2c532..60060084e1 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions/proj.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/proj.cpp @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include "elementwise_functions.hpp" #include "proj.hpp" @@ -52,7 +52,7 @@ namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; using ew_cmn_ns::unary_contig_impl_fn_ptr_t; using ew_cmn_ns::unary_strided_impl_fn_ptr_t; -// U??: ==== PROJ (x) +// U40: ==== PROJ (x) namespace impl { diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/real.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/real.cpp index f14dd1b2eb..890a308a4e 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions/real.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/real.cpp @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include "elementwise_functions.hpp" #include "real.hpp" diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/remainder.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/remainder.cpp index 468bb288c4..3255ea7e7f 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions/remainder.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/remainder.cpp @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include "elementwise_functions.hpp" #include "remainder.hpp" diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/round.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/round.cpp index 324bd144f6..cce730b899 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions/round.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/round.cpp @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include "elementwise_functions.hpp" #include "round.hpp" diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/rsqrt.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/rsqrt.cpp index e8282d5f4d..4661fdfa48 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions/rsqrt.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/rsqrt.cpp @@ -52,7 +52,7 @@ namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; using ew_cmn_ns::unary_contig_impl_fn_ptr_t; using ew_cmn_ns::unary_strided_impl_fn_ptr_t; -// U??: ==== RSQRT (x) +// U39: ==== RSQRT (x) namespace impl { diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/sign.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/sign.cpp index 85aee184fe..7b7c2c22e5 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions/sign.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/sign.cpp @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include "elementwise_functions.hpp" #include "sign.hpp" diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/signbit.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/signbit.cpp index f24ed907b5..fc101dd64b 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions/signbit.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/signbit.cpp @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include "elementwise_functions.hpp" #include "signbit.hpp" @@ -52,7 +52,7 @@ namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; using ew_cmn_ns::unary_contig_impl_fn_ptr_t; using ew_cmn_ns::unary_strided_impl_fn_ptr_t; -// U??: ==== SIGNBIT (x) +// U41: ==== SIGNBIT (x) namespace impl { diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/sin.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/sin.cpp index 76993251d9..415dc15133 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions/sin.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/sin.cpp @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include "elementwise_functions.hpp" #include "sin.hpp" diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/sinh.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/sinh.cpp index 9befdfd1a7..d9f92eb8f1 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions/sinh.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/sinh.cpp @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include "elementwise_functions.hpp" #include "sinh.hpp" diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/sqrt.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/sqrt.cpp index 93c22603d5..159d45b51c 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions/sqrt.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/sqrt.cpp @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include "elementwise_functions.hpp" #include "sqrt.hpp" diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/square.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/square.cpp index 45576a659c..184e09c19c 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions/square.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/square.cpp @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include "elementwise_functions.hpp" #include "square.hpp" diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/subtract.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/subtract.cpp index 743647f019..9703182e7a 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions/subtract.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/subtract.cpp @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include "elementwise_functions.hpp" #include "subtract.hpp" diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/tan.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/tan.cpp index c916d4ea9d..2f1fbf55f2 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions/tan.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/tan.cpp @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include "elementwise_functions.hpp" #include "tan.hpp" @@ -52,7 +52,7 @@ namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; using ew_cmn_ns::unary_contig_impl_fn_ptr_t; using ew_cmn_ns::unary_strided_impl_fn_ptr_t; -// U33: ==== TAN (x) +// U34: ==== TAN (x) namespace impl { diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/tanh.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/tanh.cpp index 70112e2024..033389e46d 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions/tanh.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/tanh.cpp @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include "elementwise_functions.hpp" #include "tanh.hpp" @@ -52,7 +52,7 @@ namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; using ew_cmn_ns::unary_contig_impl_fn_ptr_t; using ew_cmn_ns::unary_strided_impl_fn_ptr_t; -// U34: ==== TANH (x) +// U35: ==== TANH (x) namespace impl { diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/true_divide.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/true_divide.cpp index e638661511..22ad9bf3cb 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions/true_divide.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/true_divide.cpp @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include "elementwise_functions.hpp" #include "true_divide.hpp" diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/trunc.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/trunc.cpp index 60af6beafa..5b2f451fb0 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions/trunc.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/trunc.cpp @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include "elementwise_functions.hpp" #include "trunc.hpp" @@ -52,7 +52,7 @@ namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; using ew_cmn_ns::unary_contig_impl_fn_ptr_t; using ew_cmn_ns::unary_strided_impl_fn_ptr_t; -// U35: ==== TRUNC (x) +// U36: ==== TRUNC (x) namespace impl { From be15ecee9ce335ae265471d52ce22bfb1041c82e Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Tue, 24 Oct 2023 10:28:59 -0500 Subject: [PATCH 15/19] Remove variable name in function declaration --- .../source/elementwise_functions/elementwise_common.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/elementwise_common.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/elementwise_common.hpp index 899c3faf22..ef9182f9a2 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions/elementwise_common.hpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/elementwise_common.hpp @@ -35,7 +35,7 @@ namespace tensor namespace py_internal { -extern void init_elementwise_functions(py::module_ m); +extern void init_elementwise_functions(py::module_); } // namespace py_internal } // namespace tensor From 69f54be6269cb2a34bf16f88eb206bce5ca10de3 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Tue, 24 Oct 2023 10:29:37 -0500 Subject: [PATCH 16/19] No need to import init functions into namespace, since they are defined in it Removed "using dpctl::tensor::py_internal::init_abs`, since this imports `init_abs` into the current namespace from `dpctl::tensor::py_internal`, but this namespace is the current namespace and so the import is a no-op. Also added brief docstring for the common init module. --- .../elementwise_common.cpp | 71 +------------------ 1 file changed, 2 insertions(+), 69 deletions(-) diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/elementwise_common.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/elementwise_common.cpp index ecd3c15ae7..751e44ff55 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions/elementwise_common.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/elementwise_common.cpp @@ -103,143 +103,76 @@ namespace tensor namespace py_internal { +/*! @brief Add elementwise functions to Python module */ void init_elementwise_functions(py::module_ m) { - using dpctl::tensor::py_internal::init_abs; init_abs(m); - using dpctl::tensor::py_internal::init_acos; init_acos(m); - using dpctl::tensor::py_internal::init_acosh; init_acosh(m); - using dpctl::tensor::py_internal::init_add; init_add(m); - using dpctl::tensor::py_internal::init_asin; init_asin(m); - using dpctl::tensor::py_internal::init_asinh; init_asinh(m); - using dpctl::tensor::py_internal::init_atan; init_atan(m); - using dpctl::tensor::py_internal::init_atan2; init_atan2(m); - using dpctl::tensor::py_internal::init_atanh; init_atanh(m); - using dpctl::tensor::py_internal::init_bitwise_and; init_bitwise_and(m); - using dpctl::tensor::py_internal::init_bitwise_invert; init_bitwise_invert(m); - using dpctl::tensor::py_internal::init_bitwise_left_shift; init_bitwise_left_shift(m); - using dpctl::tensor::py_internal::init_bitwise_or; init_bitwise_or(m); - using dpctl::tensor::py_internal::init_bitwise_right_shift; init_bitwise_right_shift(m); - using dpctl::tensor::py_internal::init_bitwise_xor; init_bitwise_xor(m); - using dpctl::tensor::py_internal::init_cbrt; init_cbrt(m); - using dpctl::tensor::py_internal::init_ceil; init_ceil(m); - using dpctl::tensor::py_internal::init_conj; init_conj(m); - using dpctl::tensor::py_internal::init_copysign; init_copysign(m); - using dpctl::tensor::py_internal::init_cos; init_cos(m); - using dpctl::tensor::py_internal::init_cosh; init_cosh(m); - using dpctl::tensor::py_internal::init_equal; + init_divide(m); init_equal(m); - using dpctl::tensor::py_internal::init_exp; init_exp(m); - using dpctl::tensor::py_internal::init_exp2; init_exp2(m); - using dpctl::tensor::py_internal::init_expm1; init_expm1(m); - using dpctl::tensor::py_internal::init_floor; init_floor(m); - using dpctl::tensor::py_internal::init_floor_divide; init_floor_divide(m); - using dpctl::tensor::py_internal::init_greater; init_greater(m); - using dpctl::tensor::py_internal::init_greater_equal; init_greater_equal(m); - using dpctl::tensor::py_internal::init_hypot; init_hypot(m); - using dpctl::tensor::py_internal::init_imag; init_imag(m); - using dpctl::tensor::py_internal::init_isfinite; init_isfinite(m); - using dpctl::tensor::py_internal::init_isinf; init_isinf(m); - using dpctl::tensor::py_internal::init_isnan; init_isnan(m); - using dpctl::tensor::py_internal::init_less; init_less(m); - using dpctl::tensor::py_internal::init_less_equal; init_less_equal(m); - using dpctl::tensor::py_internal::init_log; init_log(m); - using dpctl::tensor::py_internal::init_log10; init_log10(m); - using dpctl::tensor::py_internal::init_log1p; init_log1p(m); - using dpctl::tensor::py_internal::init_log2; init_log2(m); - using dpctl::tensor::py_internal::init_logaddexp; init_logaddexp(m); - using dpctl::tensor::py_internal::init_logical_and; init_logical_and(m); - using dpctl::tensor::py_internal::init_logical_not; init_logical_not(m); - using dpctl::tensor::py_internal::init_logical_or; init_logical_or(m); - using dpctl::tensor::py_internal::init_logical_xor; init_logical_xor(m); - using dpctl::tensor::py_internal::init_maximum; init_maximum(m); - using dpctl::tensor::py_internal::init_minimum; init_minimum(m); - using dpctl::tensor::py_internal::init_multiply; init_multiply(m); - using dpctl::tensor::py_internal::init_negative; init_negative(m); - using dpctl::tensor::py_internal::init_not_equal; init_not_equal(m); - using dpctl::tensor::py_internal::init_positive; init_positive(m); - using dpctl::tensor::py_internal::init_pow; init_pow(m); - using dpctl::tensor::py_internal::init_proj; init_proj(m); - using dpctl::tensor::py_internal::init_real; init_real(m); - using dpctl::tensor::py_internal::init_remainder; init_remainder(m); - using dpctl::tensor::py_internal::init_round; init_round(m); - using dpctl::tensor::py_internal::init_rsqrt; init_rsqrt(m); - using dpctl::tensor::py_internal::init_sign; init_sign(m); - using dpctl::tensor::py_internal::init_signbit; init_signbit(m); - using dpctl::tensor::py_internal::init_sin; init_sin(m); - using dpctl::tensor::py_internal::init_sinh; init_sinh(m); - using dpctl::tensor::py_internal::init_sqrt; init_sqrt(m); - using dpctl::tensor::py_internal::init_square; init_square(m); - using dpctl::tensor::py_internal::init_subtract; init_subtract(m); - using dpctl::tensor::py_internal::init_tan; init_tan(m); - using dpctl::tensor::py_internal::init_tanh; init_tanh(m); - using dpctl::tensor::py_internal::init_divide; - init_divide(m); - using dpctl::tensor::py_internal::init_trunc; init_trunc(m); } From 22b04e4a42a53fd5da997a89f8c5439e418d58e4 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Tue, 24 Oct 2023 10:33:04 -0500 Subject: [PATCH 17/19] Changed use of "static inline" for utility functions Instead, moved common functions into anonymous namespace as inline, which is C++ way of expressing that multiple definitions of the same function may exist in different C++ translation units, which linker unifies. --- .../elementwise_functions.hpp | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions.hpp index 225ec91f48..381345d3d1 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions.hpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions.hpp @@ -39,7 +39,16 @@ namespace py = pybind11; namespace td_ns = dpctl::tensor::type_dispatch; -static inline py::dtype _dtype_from_typenum(td_ns::typenum_t dst_typenum_t) +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +namespace +{ +inline py::dtype _dtype_from_typenum(td_ns::typenum_t dst_typenum_t) { switch (dst_typenum_t) { case td_ns::typenum_t::BOOL: @@ -75,7 +84,7 @@ static inline py::dtype _dtype_from_typenum(td_ns::typenum_t dst_typenum_t) } } -static inline int _result_typeid(int arg_typeid, const int *fn_output_id) +inline int _result_typeid(int arg_typeid, const int *fn_output_id) { if (arg_typeid < 0 || arg_typeid >= td_ns::num_types) { throw py::value_error("Input typeid " + std::to_string(arg_typeid) + @@ -85,12 +94,7 @@ static inline int _result_typeid(int arg_typeid, const int *fn_output_id) return fn_output_id[arg_typeid]; } -namespace dpctl -{ -namespace tensor -{ -namespace py_internal -{ +} // end of anonymous namespace template Date: Tue, 24 Oct 2023 13:50:48 -0500 Subject: [PATCH 18/19] Moved inline functions into separate translation units Instead of using inline keyword to allow multiple definitions of the same function in different translation units, introduced elementwise_functions_type_utils.cpp that defines these functions and a header file to use in other translatioon units. This should reduce the binary size of the produced object files and simplify the linker's job reducing the link-time. --- dpctl/tensor/CMakeLists.txt | 1 + .../elementwise_functions.hpp | 64 ++++------------- .../elementwise_functions_type_utils.cpp | 70 +++++++++++++++++++ .../elementwise_functions_type_utils.hpp | 33 +++++++++ 4 files changed, 116 insertions(+), 52 deletions(-) create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions_type_utils.cpp create mode 100644 dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions_type_utils.hpp diff --git a/dpctl/tensor/CMakeLists.txt b/dpctl/tensor/CMakeLists.txt index 88f78525d4..35ca62198f 100644 --- a/dpctl/tensor/CMakeLists.txt +++ b/dpctl/tensor/CMakeLists.txt @@ -32,6 +32,7 @@ endif() set(_elementwise_sources ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/elementwise_common.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/elementwise_functions_type_utils.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/abs.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/acos.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/acosh.cpp diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions.hpp index 381345d3d1..6817a3541c 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions.hpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions.hpp @@ -31,6 +31,7 @@ #include #include +#include "elementwise_functions_type_utils.hpp" #include "simplify_iteration_space.hpp" #include "utils/memory_overlap.hpp" #include "utils/offset_utils.hpp" @@ -46,56 +47,7 @@ namespace tensor namespace py_internal { -namespace -{ -inline py::dtype _dtype_from_typenum(td_ns::typenum_t dst_typenum_t) -{ - switch (dst_typenum_t) { - case td_ns::typenum_t::BOOL: - return py::dtype("?"); - case td_ns::typenum_t::INT8: - return py::dtype("i1"); - case td_ns::typenum_t::UINT8: - return py::dtype("u1"); - case td_ns::typenum_t::INT16: - return py::dtype("i2"); - case td_ns::typenum_t::UINT16: - return py::dtype("u2"); - case td_ns::typenum_t::INT32: - return py::dtype("i4"); - case td_ns::typenum_t::UINT32: - return py::dtype("u4"); - case td_ns::typenum_t::INT64: - return py::dtype("i8"); - case td_ns::typenum_t::UINT64: - return py::dtype("u8"); - case td_ns::typenum_t::HALF: - return py::dtype("f2"); - case td_ns::typenum_t::FLOAT: - return py::dtype("f4"); - case td_ns::typenum_t::DOUBLE: - return py::dtype("f8"); - case td_ns::typenum_t::CFLOAT: - return py::dtype("c8"); - case td_ns::typenum_t::CDOUBLE: - return py::dtype("c16"); - default: - throw py::value_error("Unrecognized dst_typeid"); - } -} - -inline int _result_typeid(int arg_typeid, const int *fn_output_id) -{ - if (arg_typeid < 0 || arg_typeid >= td_ns::num_types) { - throw py::value_error("Input typeid " + std::to_string(arg_typeid) + - " is outside of expected bounds."); - } - - return fn_output_id[arg_typeid]; -} - -} // end of anonymous namespace - +/*! @brief Template implementing Python API for unary elementwise functions */ template @@ -297,6 +249,8 @@ py_unary_ufunc(const dpctl::tensor::usm_ndarray &src, strided_fn_ev); } +/*! @brief Template implementing Python API for querying of type support by + * unary elementwise functions */ template py::object py_unary_ufunc_result_type(const py::dtype &input_dtype, const output_typesT &output_types) @@ -312,6 +266,7 @@ py::object py_unary_ufunc_result_type(const py::dtype &input_dtype, throw py::value_error(e.what()); } + using dpctl::tensor::py_internal::type_utils::_result_typeid; int dst_typeid = _result_typeid(src_typeid, output_types); if (dst_typeid < 0) { @@ -319,8 +274,9 @@ py::object py_unary_ufunc_result_type(const py::dtype &input_dtype, return py::cast(res); } else { - auto dst_typenum_t = static_cast(dst_typeid); + using dpctl::tensor::py_internal::type_utils::_dtype_from_typenum; + auto dst_typenum_t = static_cast(dst_typeid); auto dt = _dtype_from_typenum(dst_typenum_t); return py::cast(dt); @@ -338,6 +294,8 @@ bool isEqual(Container const &c, std::initializer_list const &l) } } // namespace +/*! @brief Template implementing Python API for binary elementwise + * functions */ template py_binary_ufunc( strided_fn_ev); } +/*! @brief Type querying for binary elementwise functions */ template py::object py_binary_ufunc_result_type(const py::dtype &input1_dtype, const py::dtype &input2_dtype, @@ -636,8 +595,9 @@ py::object py_binary_ufunc_result_type(const py::dtype &input1_dtype, return py::cast(res); } else { - auto dst_typenum_t = static_cast(dst_typeid); + using dpctl::tensor::py_internal::type_utils::_dtype_from_typenum; + auto dst_typenum_t = static_cast(dst_typeid); auto dt = _dtype_from_typenum(dst_typenum_t); return py::cast(dt); diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions_type_utils.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions_type_utils.cpp new file mode 100644 index 0000000000..d5179f052c --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions_type_utils.cpp @@ -0,0 +1,70 @@ +#include "dpctl4pybind11.hpp" +#include +#include +#include + +#include "elementwise_functions_type_utils.hpp" +#include "utils/type_dispatch.hpp" + +namespace py = pybind11; +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ +namespace type_utils +{ + +py::dtype _dtype_from_typenum(td_ns::typenum_t dst_typenum_t) +{ + switch (dst_typenum_t) { + case td_ns::typenum_t::BOOL: + return py::dtype("?"); + case td_ns::typenum_t::INT8: + return py::dtype("i1"); + case td_ns::typenum_t::UINT8: + return py::dtype("u1"); + case td_ns::typenum_t::INT16: + return py::dtype("i2"); + case td_ns::typenum_t::UINT16: + return py::dtype("u2"); + case td_ns::typenum_t::INT32: + return py::dtype("i4"); + case td_ns::typenum_t::UINT32: + return py::dtype("u4"); + case td_ns::typenum_t::INT64: + return py::dtype("i8"); + case td_ns::typenum_t::UINT64: + return py::dtype("u8"); + case td_ns::typenum_t::HALF: + return py::dtype("f2"); + case td_ns::typenum_t::FLOAT: + return py::dtype("f4"); + case td_ns::typenum_t::DOUBLE: + return py::dtype("f8"); + case td_ns::typenum_t::CFLOAT: + return py::dtype("c8"); + case td_ns::typenum_t::CDOUBLE: + return py::dtype("c16"); + default: + throw py::value_error("Unrecognized dst_typeid"); + } +} + +int _result_typeid(int arg_typeid, const int *fn_output_id) +{ + if (arg_typeid < 0 || arg_typeid >= td_ns::num_types) { + throw py::value_error("Input typeid " + std::to_string(arg_typeid) + + " is outside of expected bounds."); + } + + return fn_output_id[arg_typeid]; +} + +} // namespace type_utils +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions_type_utils.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions_type_utils.hpp new file mode 100644 index 0000000000..7621de9038 --- /dev/null +++ b/dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions_type_utils.hpp @@ -0,0 +1,33 @@ +#pragma once + +#pragma once +#include "dpctl4pybind11.hpp" +#include +#include +#include + +#include "utils/type_dispatch.hpp" + +namespace py = pybind11; +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ +namespace type_utils +{ + +/*! @brief Produce dtype from a type number */ +extern py::dtype _dtype_from_typenum(td_ns::typenum_t); + +/*! @brief Lookup typeid of the result from typeid of + * argument and the mapping table */ +extern int _result_typeid(int, const int *); + +} // namespace type_utils +} // namespace py_internal +} // namespace tensor +} // namespace dpctl From 8be8545389fa5159ebc219fbdf9e0a608b413fab Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Wed, 25 Oct 2023 06:18:24 -0500 Subject: [PATCH 19/19] Added license header for 2 new files --- .../elementwise_functions_type_utils.cpp | 25 +++++++++++++++++++ .../elementwise_functions_type_utils.hpp | 25 ++++++++++++++++++- 2 files changed, 49 insertions(+), 1 deletion(-) diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions_type_utils.cpp b/dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions_type_utils.cpp index d5179f052c..473048e8fa 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions_type_utils.cpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions_type_utils.cpp @@ -1,3 +1,28 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions for looking of supported types in elementwise +/// functions. +//===----------------------------------------------------------------------===// + #include "dpctl4pybind11.hpp" #include #include diff --git a/dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions_type_utils.hpp b/dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions_type_utils.hpp index 7621de9038..6dac195dc2 100644 --- a/dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions_type_utils.hpp +++ b/dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions_type_utils.hpp @@ -1,4 +1,27 @@ -#pragma once +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file declares functions for looking of supported types in elementwise +/// functions. +//===----------------------------------------------------------------------===// #pragma once #include "dpctl4pybind11.hpp"