Skip to content

Commit 4766173

Browse files
Resolve COPY_INSTEAD_OF_MOVE issues in libtensor
1 parent 17f6259 commit 4766173

20 files changed

+91
-82
lines changed

dpctl/apis/include/dpctl4pybind11.hpp

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
#include <complex>
3131
#include <memory>
3232
#include <pybind11/pybind11.h>
33+
#include <utility>
3334
#include <vector>
3435

3536
namespace py = pybind11;
@@ -369,19 +370,19 @@ class dpctl_capi
369370
sycl::queue q_{};
370371
PySyclQueueObject *py_q_tmp =
371372
SyclQueue_Make(reinterpret_cast<DPCTLSyclQueueRef>(&q_));
372-
py::object py_sycl_queue = py::reinterpret_steal<py::object>(
373+
const py::object &py_sycl_queue = py::reinterpret_steal<py::object>(
373374
reinterpret_cast<PyObject *>(py_q_tmp));
374375

375376
default_sycl_queue_ = std::shared_ptr<py::object>(
376377
new py::object(py_sycl_queue), Deleter{});
377378

378379
py::module_ mod_memory = py::module_::import("dpctl.memory");
379-
py::object py_as_usm_memory = mod_memory.attr("as_usm_memory");
380+
const py::object &py_as_usm_memory = mod_memory.attr("as_usm_memory");
380381
as_usm_memory_ = std::shared_ptr<py::object>(
381382
new py::object{py_as_usm_memory}, Deleter{});
382383

383384
auto mem_kl = mod_memory.attr("MemoryUSMHost");
384-
py::object py_default_usm_memory =
385+
const py::object &py_default_usm_memory =
385386
mem_kl(1, py::arg("queue") = py_sycl_queue);
386387
default_usm_memory_ = std::shared_ptr<py::object>(
387388
new py::object{py_default_usm_memory}, Deleter{});
@@ -390,7 +391,7 @@ class dpctl_capi
390391
py::module_::import("dpctl.tensor._usmarray");
391392
auto tensor_kl = mod_usmarray.attr("usm_ndarray");
392393

393-
py::object py_default_usm_ndarray =
394+
const py::object &py_default_usm_ndarray =
394395
tensor_kl(py::tuple(), py::arg("dtype") = py::str("u1"),
395396
py::arg("buffer") = py_default_usm_memory);
396397

@@ -1032,7 +1033,7 @@ namespace utils
10321033
{
10331034

10341035
template <std::size_t num>
1035-
sycl::event keep_args_alive(sycl::queue q,
1036+
sycl::event keep_args_alive(sycl::queue &q,
10361037
const py::object (&py_objs)[num],
10371038
const std::vector<sycl::event> &depends = {})
10381039
{
@@ -1043,7 +1044,7 @@ sycl::event keep_args_alive(sycl::queue q,
10431044
shp_arr[i] = std::make_shared<py::handle>(py_objs[i]);
10441045
shp_arr[i]->inc_ref();
10451046
}
1046-
cgh.host_task([=]() {
1047+
cgh.host_task([shp_arr = std::move(shp_arr)]() {
10471048
py::gil_scoped_acquire acquire;
10481049

10491050
for (std::size_t i = 0; i < num; ++i) {

dpctl/tensor/libtensor/include/kernels/accumulators.hpp

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -116,19 +116,20 @@ sycl::event inclusive_scan_rec(sycl::queue &exec_q,
116116
{
117117
size_t n_groups = ceiling_quotient(n_elems, n_wi * wg_size);
118118

119-
sycl::event inc_scan_phase1_ev = exec_q.submit([&](sycl::handler &cgh) {
120-
cgh.depends_on(depends);
119+
const sycl::event &inc_scan_phase1_ev =
120+
exec_q.submit([&](sycl::handler &cgh) {
121+
cgh.depends_on(depends);
121122

122-
using slmT = sycl::local_accessor<size_t, 1>;
123+
using slmT = sycl::local_accessor<size_t, 1>;
123124

124-
auto lws = sycl::range<1>(wg_size);
125-
auto gws = sycl::range<1>(n_groups * wg_size);
125+
auto lws = sycl::range<1>(wg_size);
126+
auto gws = sycl::range<1>(n_groups * wg_size);
126127

127-
slmT slm_iscan_tmp(lws, cgh);
128+
slmT slm_iscan_tmp(lws, cgh);
128129

129130
cgh.parallel_for<class inclusive_scan_rec_local_scan_krn<
130131
inputT, outputT, n_wi, IndexerT, decltype(transformer)>>(
131-
sycl::nd_range<1>(gws, lws), [=](sycl::nd_item<1> it)
132+
sycl::nd_range<1>(gws, lws), [=, slm_iscan_tmp = std::move(slm_iscan_tmp)](sycl::nd_item<1> it)
132133
{
133134
auto chunk_gid = it.get_global_id(0);
134135
auto lid = it.get_local_id(0);
@@ -172,7 +173,7 @@ sycl::event inclusive_scan_rec(sycl::queue &exec_q,
172173
output[i + m_wi] = local_isum[m_wi];
173174
}
174175
});
175-
});
176+
});
176177

177178
sycl::event out_event = inc_scan_phase1_ev;
178179
if (n_groups > 1) {
@@ -203,11 +204,11 @@ sycl::event inclusive_scan_rec(sycl::queue &exec_q,
203204

204205
sycl::event e4 = exec_q.submit([&](sycl::handler &cgh) {
205206
cgh.depends_on(e3);
206-
auto ctx = exec_q.get_context();
207+
const auto &ctx = exec_q.get_context();
207208
cgh.host_task([ctx, temp]() { sycl::free(temp, ctx); });
208209
});
209210

210-
out_event = e4;
211+
out_event = std::move(e4);
211212
}
212213

213214
return out_event;
@@ -235,7 +236,7 @@ size_t accumulate_contig_impl(sycl::queue &q,
235236
NoOpIndexer flat_indexer{};
236237
transformerT non_zero_indicator{};
237238

238-
sycl::event comp_ev =
239+
const sycl::event &comp_ev =
239240
inclusive_scan_rec<maskT, cumsumT, n_wi, decltype(flat_indexer),
240241
decltype(non_zero_indicator)>(
241242
q, n_elems, wg_size, mask_data_ptr, cumsum_data_ptr, 0, 1,
@@ -321,7 +322,7 @@ size_t accumulate_strided_impl(sycl::queue &q,
321322
StridedIndexer strided_indexer{nd, 0, shape_strides};
322323
transformerT non_zero_indicator{};
323324

324-
sycl::event comp_ev =
325+
const sycl::event &comp_ev =
325326
inclusive_scan_rec<maskT, cumsumT, n_wi, decltype(strided_indexer),
326327
decltype(non_zero_indicator)>(
327328
q, n_elems, wg_size, mask_data_ptr, cumsum_data_ptr, 0, 1,

dpctl/tensor/libtensor/include/kernels/copy_and_cast.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -483,12 +483,12 @@ template <typename AccessorT,
483483
class GenericCopyFromHostFunctor
484484
{
485485
private:
486-
AccessorT src_acc_;
486+
const AccessorT src_acc_;
487487
dstTy *dst_ = nullptr;
488488
IndexerT indexer_;
489489

490490
public:
491-
GenericCopyFromHostFunctor(AccessorT src_acc,
491+
GenericCopyFromHostFunctor(const AccessorT &src_acc,
492492
dstTy *dst_p,
493493
IndexerT indexer)
494494
: src_acc_(src_acc), dst_(dst_p), indexer_(indexer)

dpctl/tensor/libtensor/include/kernels/elementwise_functions/common.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -862,7 +862,7 @@ sycl::event binary_contig_matrix_contig_row_broadcast_impl(
862862

863863
sycl::event tmp_cleanup_ev = exec_q.submit([&](sycl::handler &cgh) {
864864
cgh.depends_on(comp_ev);
865-
sycl::context ctx = exec_q.get_context();
865+
const sycl::context &ctx = exec_q.get_context();
866866
cgh.host_task([ctx, padded_vec]() { sycl::free(padded_vec, ctx); });
867867
});
868868
host_tasks.push_back(tmp_cleanup_ev);

dpctl/tensor/libtensor/include/kernels/reductions.hpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#include <cstddef>
2929
#include <cstdint>
3030
#include <type_traits>
31+
#include <utility>
3132
#include <vector>
3233

3334
#include "pybind11/pybind11.h"
@@ -760,7 +761,8 @@ sycl::event sum_reduction_over_group_temps_strided_impl(
760761
partially_reduced_tmp + reduction_groups * iter_nelems;
761762
}
762763

763-
sycl::event first_reduction_ev = exec_q.submit([&](sycl::handler &cgh) {
764+
const sycl::event &first_reduction_ev = exec_q.submit([&](sycl::handler
765+
&cgh) {
764766
cgh.depends_on(depends);
765767

766768
using InputIndexerT = dpctl::tensor::offset_utils::StridedIndexer;
@@ -858,7 +860,7 @@ sycl::event sum_reduction_over_group_temps_strided_impl(
858860

859861
remaining_reduction_nelems = reduction_groups_;
860862
std::swap(temp_arg, temp2_arg);
861-
dependent_ev = partial_reduction_ev;
863+
dependent_ev = std::move(partial_reduction_ev);
862864
}
863865

864866
// final reduction to res
@@ -915,7 +917,7 @@ sycl::event sum_reduction_over_group_temps_strided_impl(
915917
sycl::event cleanup_host_task_event =
916918
exec_q.submit([&](sycl::handler &cgh) {
917919
cgh.depends_on(final_reduction_ev);
918-
sycl::context ctx = exec_q.get_context();
920+
const sycl::context &ctx = exec_q.get_context();
919921

920922
cgh.host_task([ctx, partially_reduced_tmp] {
921923
sycl::free(partially_reduced_tmp, ctx);

dpctl/tensor/libtensor/include/utils/offset_utils.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ device_allocate_and_pack(sycl::queue q,
9898

9999
usm_host_allocatorT usm_host_allocator(q);
100100
shT empty{0, usm_host_allocator};
101-
shT packed_shape_strides = detail::concat(empty, vs...);
101+
shT packed_shape_strides = detail::concat(std::move(empty), vs...);
102102

103103
auto packed_shape_strides_owner =
104104
std::make_shared<shT>(std::move(packed_shape_strides));

dpctl/tensor/libtensor/include/utils/strided_iters.hpp

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -541,7 +541,7 @@ int simplify_iteration_two_strides(const int nd,
541541
}
542542

543543
template <typename T, class Error, typename vecT = std::vector<T>>
544-
std::tuple<vecT, vecT, T> contract_iter(vecT shape, vecT strides)
544+
std::tuple<vecT, vecT, T> contract_iter(const vecT &shape, const vecT &strides)
545545
{
546546
const size_t dim = shape.size();
547547
if (dim != strides.size()) {
@@ -560,7 +560,7 @@ std::tuple<vecT, vecT, T> contract_iter(vecT shape, vecT strides)
560560

561561
template <typename T, class Error, typename vecT = std::vector<T>>
562562
std::tuple<vecT, vecT, T, vecT, T>
563-
contract_iter2(vecT shape, vecT strides1, vecT strides2)
563+
contract_iter2(const vecT &shape, const vecT &strides1, const vecT &strides2)
564564
{
565565
const size_t dim = shape.size();
566566
if (dim != strides1.size() || dim != strides2.size()) {
@@ -714,8 +714,10 @@ int simplify_iteration_three_strides(const int nd,
714714
}
715715

716716
template <typename T, class Error, typename vecT = std::vector<T>>
717-
std::tuple<vecT, vecT, T, vecT, T, vecT, T>
718-
contract_iter3(vecT shape, vecT strides1, vecT strides2, vecT strides3)
717+
std::tuple<vecT, vecT, T, vecT, T, vecT, T> contract_iter3(const vecT &shape,
718+
const vecT &strides1,
719+
const vecT &strides2,
720+
const vecT &strides3)
719721
{
720722
const size_t dim = shape.size();
721723
if (dim != strides1.size() || dim != strides2.size() ||
@@ -899,11 +901,11 @@ int simplify_iteration_four_strides(const int nd,
899901

900902
template <typename T, class Error, typename vecT = std::vector<T>>
901903
std::tuple<vecT, vecT, T, vecT, T, vecT, T, vecT, T>
902-
contract_iter4(vecT shape,
903-
vecT strides1,
904-
vecT strides2,
905-
vecT strides3,
906-
vecT strides4)
904+
contract_iter4(const vecT &shape,
905+
const vecT &strides1,
906+
const vecT &strides2,
907+
const vecT &strides3,
908+
const vecT &strides4)
907909
{
908910
const size_t dim = shape.size();
909911
if (dim != strides1.size() || dim != strides2.size() ||

dpctl/tensor/libtensor/source/boolean_advanced_indexing.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -264,7 +264,7 @@ py_extract(const dpctl::tensor::usm_ndarray &src,
264264
sycl::event cleanup_tmp_allocations_ev =
265265
exec_q.submit([&](sycl::handler &cgh) {
266266
cgh.depends_on(extract_ev);
267-
auto ctx = exec_q.get_context();
267+
const auto &ctx = exec_q.get_context();
268268
cgh.host_task([ctx, packed_src_shape_strides] {
269269
sycl::free(packed_src_shape_strides, ctx);
270270
});
@@ -366,7 +366,7 @@ py_extract(const dpctl::tensor::usm_ndarray &src,
366366
sycl::event cleanup_tmp_allocations_ev =
367367
exec_q.submit([&](sycl::handler &cgh) {
368368
cgh.depends_on(extract_ev);
369-
auto ctx = exec_q.get_context();
369+
const auto &ctx = exec_q.get_context();
370370
cgh.host_task([ctx, packed_shapes_strides] {
371371
sycl::free(packed_shapes_strides, ctx);
372372
});
@@ -693,7 +693,7 @@ py_place(const dpctl::tensor::usm_ndarray &dst,
693693
sycl::event cleanup_tmp_allocations_ev =
694694
exec_q.submit([&](sycl::handler &cgh) {
695695
cgh.depends_on(place_ev);
696-
auto ctx = exec_q.get_context();
696+
const auto &ctx = exec_q.get_context();
697697
cgh.host_task([ctx, packed_shapes_strides] {
698698
sycl::free(packed_shapes_strides, ctx);
699699
});
@@ -838,7 +838,7 @@ py_nonzero(const dpctl::tensor::usm_ndarray
838838

839839
sycl::event temporaries_cleanup_ev = exec_q.submit([&](sycl::handler &cgh) {
840840
cgh.depends_on(non_zero_indexes_ev);
841-
auto ctx = exec_q.get_context();
841+
const auto &ctx = exec_q.get_context();
842842
cgh.host_task([ctx, src_shape_device_ptr] {
843843
sycl::free(src_shape_device_ptr, ctx);
844844
});

dpctl/tensor/libtensor/source/boolean_reductions.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -292,7 +292,7 @@ py_boolean_reduction(const dpctl::tensor::usm_ndarray &src,
292292

293293
sycl::event temp_cleanup_ev = exec_q.submit([&](sycl::handler &cgh) {
294294
cgh.depends_on(red_ev);
295-
auto ctx = exec_q.get_context();
295+
const auto &ctx = exec_q.get_context();
296296
cgh.host_task([ctx, packed_shapes_and_strides] {
297297
sycl::free(packed_shapes_and_strides, ctx);
298298
});

dpctl/tensor/libtensor/source/copy_and_cast_usm_to_usm.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -250,15 +250,15 @@ copy_usm_ndarray_into_usm_ndarray(const dpctl::tensor::usm_ndarray &src,
250250
if (shape_strides == nullptr) {
251251
throw std::runtime_error("Unable to allocate device memory");
252252
}
253-
sycl::event copy_shape_ev = std::get<2>(ptr_size_event_tuple);
253+
const sycl::event &copy_shape_ev = std::get<2>(ptr_size_event_tuple);
254254

255-
sycl::event copy_and_cast_generic_ev = copy_and_cast_fn(
255+
const sycl::event &copy_and_cast_generic_ev = copy_and_cast_fn(
256256
exec_q, src_nelems, nd, shape_strides, src_data, src_offset, dst_data,
257257
dst_offset, depends, {copy_shape_ev});
258258

259259
// async free of shape_strides temporary
260-
auto ctx = exec_q.get_context();
261-
auto temporaries_cleanup_ev = exec_q.submit([&](sycl::handler &cgh) {
260+
const auto &ctx = exec_q.get_context();
261+
const auto &temporaries_cleanup_ev = exec_q.submit([&](sycl::handler &cgh) {
262262
cgh.depends_on(copy_and_cast_generic_ev);
263263
cgh.host_task(
264264
[ctx, shape_strides]() { sycl::free(shape_strides, ctx); });

dpctl/tensor/libtensor/source/copy_for_reshape.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ copy_usm_ndarray_for_reshape(const dpctl::tensor::usm_ndarray &src,
158158

159159
auto temporaries_cleanup_ev = exec_q.submit([&](sycl::handler &cgh) {
160160
cgh.depends_on(copy_for_reshape_event);
161-
auto ctx = exec_q.get_context();
161+
const auto &ctx = exec_q.get_context();
162162
cgh.host_task(
163163
[shape_strides, ctx]() { sycl::free(shape_strides, ctx); });
164164
});

dpctl/tensor/libtensor/source/copy_for_roll.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -239,7 +239,7 @@ copy_usm_ndarray_for_roll_1d(const dpctl::tensor::usm_ndarray &src,
239239

240240
auto temporaries_cleanup_ev = exec_q.submit([&](sycl::handler &cgh) {
241241
cgh.depends_on(copy_for_roll_event);
242-
auto ctx = exec_q.get_context();
242+
const auto &ctx = exec_q.get_context();
243243
cgh.host_task(
244244
[shape_strides, ctx]() { sycl::free(shape_strides, ctx); });
245245
});
@@ -379,7 +379,7 @@ copy_usm_ndarray_for_roll_nd(const dpctl::tensor::usm_ndarray &src,
379379

380380
auto temporaries_cleanup_ev = exec_q.submit([&](sycl::handler &cgh) {
381381
cgh.depends_on(copy_for_roll_event);
382-
auto ctx = exec_q.get_context();
382+
const auto &ctx = exec_q.get_context();
383383
cgh.host_task([shape_strides_shifts, ctx]() {
384384
sycl::free(shape_strides_shifts, ctx);
385385
});

dpctl/tensor/libtensor/source/copy_numpy_ndarray_into_usm_ndarray.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,7 @@ void copy_numpy_ndarray_into_usm_ndarray(
236236
if (shape_strides == nullptr) {
237237
throw std::runtime_error("Unable to allocate device memory");
238238
}
239-
sycl::event copy_shape_ev = std::get<2>(ptr_size_event_tuple);
239+
const sycl::event &copy_shape_ev = std::get<2>(ptr_size_event_tuple);
240240

241241
// Get implementation function pointer
242242
auto copy_and_cast_from_host_blocking_fn =

dpctl/tensor/libtensor/source/elementwise_functions.hpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -227,7 +227,7 @@ py_unary_ufunc(const dpctl::tensor::usm_ndarray &src,
227227
q, host_tasks, simplified_shape, simplified_src_strides,
228228
simplified_dst_strides);
229229
py::ssize_t *shape_strides = std::get<0>(ptr_size_event_triple_);
230-
sycl::event copy_shape_ev = std::get<2>(ptr_size_event_triple_);
230+
const sycl::event &copy_shape_ev = std::get<2>(ptr_size_event_triple_);
231231

232232
if (shape_strides == nullptr) {
233233
throw std::runtime_error("Device memory allocation failed");
@@ -533,7 +533,7 @@ std::pair<sycl::event, sycl::event> py_binary_ufunc(
533533
simplified_src2_strides, simplified_dst_strides);
534534

535535
py::ssize_t *shape_strides = std::get<0>(ptr_sz_event_triple_);
536-
sycl::event copy_shape_ev = std::get<2>(ptr_sz_event_triple_);
536+
const sycl::event &copy_shape_ev = std::get<2>(ptr_sz_event_triple_);
537537

538538
if (shape_strides == nullptr) {
539539
throw std::runtime_error("Unabled to allocate device memory");
@@ -799,7 +799,7 @@ py_binary_inplace_ufunc(const dpctl::tensor::usm_ndarray &lhs,
799799
simplified_lhs_strides);
800800

801801
py::ssize_t *shape_strides = std::get<0>(ptr_sz_event_triple_);
802-
sycl::event copy_shape_ev = std::get<2>(ptr_sz_event_triple_);
802+
const sycl::event &copy_shape_ev = std::get<2>(ptr_sz_event_triple_);
803803

804804
if (shape_strides == nullptr) {
805805
throw std::runtime_error("Unabled to allocate device memory");

0 commit comments

Comments
 (0)