@@ -82,9 +82,11 @@ struct UnaryContigFunctor
82
82
sycl::vec<resT, vec_sz> res_vec (const_val);
83
83
#pragma unroll
84
84
for (std::uint8_t it = 0 ; it < n_vecs * vec_sz; it += vec_sz) {
85
+ size_t offset = base + static_cast <size_t >(it) *
86
+ static_cast <size_t >(sgSize);
85
87
auto out_multi_ptr = sycl::address_space_cast<
86
88
sycl::access::address_space::global_space,
87
- sycl::access::decorated::yes>(&out[base + it * sgSize ]);
89
+ sycl::access::decorated::yes>(&out[offset ]);
88
90
89
91
sg.store <vec_sz>(out_multi_ptr, res_vec);
90
92
}
@@ -111,12 +113,14 @@ struct UnaryContigFunctor
111
113
112
114
#pragma unroll
113
115
for (std::uint16_t it = 0 ; it < n_vecs * vec_sz; it += vec_sz) {
116
+ size_t offset = base + static_cast <size_t >(it) *
117
+ static_cast <size_t >(sgSize);
114
118
auto in_multi_ptr = sycl::address_space_cast<
115
119
sycl::access::address_space::global_space,
116
- sycl::access::decorated::yes>(&in[base + it * sgSize ]);
120
+ sycl::access::decorated::yes>(&in[offset ]);
117
121
auto out_multi_ptr = sycl::address_space_cast<
118
122
sycl::access::address_space::global_space,
119
- sycl::access::decorated::yes>(&out[base + it * sgSize ]);
123
+ sycl::access::decorated::yes>(&out[offset ]);
120
124
121
125
x = sg.load <vec_sz>(in_multi_ptr);
122
126
sycl::vec<resT, vec_sz> res_vec = op (x);
@@ -149,12 +153,14 @@ struct UnaryContigFunctor
149
153
150
154
#pragma unroll
151
155
for (std::uint8_t it = 0 ; it < n_vecs * vec_sz; it += vec_sz) {
156
+ size_t offset = base + static_cast <size_t >(it) *
157
+ static_cast <size_t >(sgSize);
152
158
auto in_multi_ptr = sycl::address_space_cast<
153
159
sycl::access::address_space::global_space,
154
- sycl::access::decorated::yes>(&in[base + it * sgSize ]);
160
+ sycl::access::decorated::yes>(&in[offset ]);
155
161
auto out_multi_ptr = sycl::address_space_cast<
156
162
sycl::access::address_space::global_space,
157
- sycl::access::decorated::yes>(&out[base + it * sgSize ]);
163
+ sycl::access::decorated::yes>(&out[offset ]);
158
164
159
165
arg_vec = sg.load <vec_sz>(in_multi_ptr);
160
166
#pragma unroll
@@ -188,12 +194,14 @@ struct UnaryContigFunctor
188
194
189
195
#pragma unroll
190
196
for (std::uint8_t it = 0 ; it < n_vecs * vec_sz; it += vec_sz) {
197
+ size_t offset = base + static_cast <size_t >(it) *
198
+ static_cast <size_t >(sgSize);
191
199
auto in_multi_ptr = sycl::address_space_cast<
192
200
sycl::access::address_space::global_space,
193
- sycl::access::decorated::yes>(&in[base + it * sgSize ]);
201
+ sycl::access::decorated::yes>(&in[offset ]);
194
202
auto out_multi_ptr = sycl::address_space_cast<
195
203
sycl::access::address_space::global_space,
196
- sycl::access::decorated::yes>(&out[base + it * sgSize ]);
204
+ sycl::access::decorated::yes>(&out[offset ]);
197
205
198
206
arg_vec = sg.load <vec_sz>(in_multi_ptr);
199
207
#pragma unroll
@@ -375,15 +383,17 @@ struct BinaryContigFunctor
375
383
376
384
#pragma unroll
377
385
for (std::uint8_t it = 0 ; it < n_vecs * vec_sz; it += vec_sz) {
386
+ size_t offset = base + static_cast <size_t >(it) *
387
+ static_cast <size_t >(sgSize);
378
388
auto in1_multi_ptr = sycl::address_space_cast<
379
389
sycl::access::address_space::global_space,
380
- sycl::access::decorated::yes>(&in1[base + it * sgSize ]);
390
+ sycl::access::decorated::yes>(&in1[offset ]);
381
391
auto in2_multi_ptr = sycl::address_space_cast<
382
392
sycl::access::address_space::global_space,
383
- sycl::access::decorated::yes>(&in2[base + it * sgSize ]);
393
+ sycl::access::decorated::yes>(&in2[offset ]);
384
394
auto out_multi_ptr = sycl::address_space_cast<
385
395
sycl::access::address_space::global_space,
386
- sycl::access::decorated::yes>(&out[base + it * sgSize ]);
396
+ sycl::access::decorated::yes>(&out[offset ]);
387
397
388
398
arg1_vec = sg.load <vec_sz>(in1_multi_ptr);
389
399
arg2_vec = sg.load <vec_sz>(in2_multi_ptr);
@@ -415,15 +425,17 @@ struct BinaryContigFunctor
415
425
416
426
#pragma unroll
417
427
for (std::uint8_t it = 0 ; it < n_vecs * vec_sz; it += vec_sz) {
428
+ size_t offset = base + static_cast <size_t >(it) *
429
+ static_cast <size_t >(sgSize);
418
430
auto in1_multi_ptr = sycl::address_space_cast<
419
431
sycl::access::address_space::global_space,
420
- sycl::access::decorated::yes>(&in1[base + it * sgSize ]);
432
+ sycl::access::decorated::yes>(&in1[offset ]);
421
433
auto in2_multi_ptr = sycl::address_space_cast<
422
434
sycl::access::address_space::global_space,
423
- sycl::access::decorated::yes>(&in2[base + it * sgSize ]);
435
+ sycl::access::decorated::yes>(&in2[offset ]);
424
436
auto out_multi_ptr = sycl::address_space_cast<
425
437
sycl::access::address_space::global_space,
426
- sycl::access::decorated::yes>(&out[base + it * sgSize ]);
438
+ sycl::access::decorated::yes>(&out[offset ]);
427
439
428
440
arg1_vec = sg.load <vec_sz>(in1_multi_ptr);
429
441
arg2_vec = sg.load <vec_sz>(in2_multi_ptr);
0 commit comments