@@ -174,24 +174,115 @@ int main() {
174
174
}
175
175
176
176
if (combinations[i].nsize == 16 ) { // architecture::intel_gpu_pvc
177
- test_get_coord_op<bfloat16, float , /* TM*/ 8 , /* TK*/ 16 , use::a,
178
- layout::row_major, 1 >();
179
177
test_get_coord_op<int8_t , int , /* TM*/ 8 , /* TK*/ 32 , use::a,
180
178
layout::row_major, 1 >();
181
179
test_get_coord_op<bfloat16, float , /* TK*/ 16 , /* TN*/ 16 , use::b,
182
180
layout::ext_intel_packed, 2 >();
183
181
test_get_coord_op<int8_t , int32_t , /* TK*/ 32 , /* TN*/ 16 , use::b,
184
182
layout::ext_intel_packed, 4 >();
185
- test_get_coord_op<float , float , /* TM*/ 8 , /* TN*/ 16 , use::accumulator,
186
- layout::row_major, 1 >();
187
183
test_get_coord_op<int32_t , int32_t , /* TM*/ 8 , /* TN*/ 16 , use::accumulator,
188
184
layout::row_major, 1 >();
189
185
// This combination is not currently supported for sub group size = 32 in
190
186
// IGC
191
187
#if (!defined(SG_SZ) || SG_SZ != 32)
188
+ // 8x16x16 float/bfloat16
189
+ std::cout << " 8x16x16 float/bfloat16" << std::endl;
190
+ // A
191
+ test_get_coord_op<bfloat16, float , /* TM*/ 8 , /* TK*/ 16 , use::a,
192
+ layout::row_major, 1 >();
193
+ // B
194
+ test_get_coord_op<bfloat16, float , /* TK*/ 16 , /* TN*/ 16 , use::b,
195
+ layout::ext_intel_packed, 2 >();
192
196
test_get_coord_op<bfloat16, float , /* TK*/ 16 , /* TN*/ 16 , use::b,
193
197
layout::row_major, 1 >();
194
- test_get_coord_op<int8_t , int32_t , /* TK*/ 32 , /* TN*/ 16 , use::b,
198
+ // Accumulator
199
+ test_get_coord_op<bfloat16, float , /* TM*/ 8 , /* TN*/ 16 , use::accumulator,
200
+ layout::row_major, 1 >();
201
+ test_get_coord_op<float , float , /* TM*/ 8 , /* TN*/ 16 , use::accumulator,
202
+ layout::row_major, 1 >();
203
+
204
+
205
+ // 16x16x16 float/bfloat16
206
+ std::cout << " 16x16x16 float/bfloat16" << std::endl;
207
+ // A
208
+ test_get_coord_op<bfloat16, float , /* TM*/ 16 , /* TK*/ 16 , use::a,
209
+ layout::row_major, 1 >();
210
+ // B
211
+ // Duplicate from 8x16x16
212
+ // test_get_coord_op<bfloat16, float, /*TK*/ 16, /*TN*/ 16, use::b,
213
+ // layout::ext_intel_packed, 2>();
214
+ // test_get_coord_op<bfloat16, float, /*TK*/ 16, /*TN*/ 16, use::b,
215
+ // layout::row_major, 1>();
216
+ // Accumulator
217
+ test_get_coord_op<bfloat16, float , /* TM*/ 16 , /* TN*/ 16 , use::accumulator,
218
+ layout::row_major, 1 >();
219
+ test_get_coord_op<float , float , /* TM*/ 16 , /* TN*/ 16 , use::accumulator,
220
+ layout::row_major, 1 >();
221
+
222
+ // 1x64x16 float/bfloat16
223
+ std::cout << " 1x64x16 float/bfloat16" << std::endl;
224
+ // A
225
+ test_get_coord_op<bfloat16, float , /* TM*/ 1 , /* TK*/ 16 , use::a,
226
+ layout::row_major, 1 >();
227
+ // B
228
+ test_get_coord_op<bfloat16, float , /* TK*/ 16 , /* TN*/ 64 , use::b,
229
+ layout::ext_intel_packed, 2 >();
230
+ test_get_coord_op<bfloat16, float , /* TK*/ 16 , /* TN*/ 64 , use::b,
231
+ layout::row_major, 1 >();
232
+ // Accumulator
233
+ test_get_coord_op<bfloat16, float , /* TM*/ 1 , /* TN*/ 64 , use::accumulator,
234
+ layout::row_major, 1 >();
235
+ test_get_coord_op<float , float , /* TM*/ 1 , /* TN*/ 64 , use::accumulator,
236
+ layout::row_major, 1 >();
237
+
238
+ // 1x64x32 float/bfloat16
239
+ std::cout << " 1x64x32 float/bfloat16" << std::endl;
240
+ // A
241
+ test_get_coord_op<bfloat16, float , /* TM*/ 1 , /* TK*/ 32 , use::a,
242
+ layout::row_major, 1 >();
243
+ // B
244
+ test_get_coord_op<bfloat16, float , /* TK*/ 32 , /* TN*/ 64 , use::b,
245
+ layout::ext_intel_packed, 2 >();
246
+ test_get_coord_op<bfloat16, float , /* TK*/ 32 , /* TN*/ 64 , use::b,
247
+ layout::row_major, 1 >();
248
+ // Accumulator
249
+ test_get_coord_op<bfloat16, float , /* TM*/ 1 , /* TN*/ 64 , use::accumulator,
250
+ layout::row_major, 1 >();
251
+ test_get_coord_op<float , float , /* TM*/ 1 , /* TN*/ 64 , use::accumulator,
252
+ layout::row_major, 1 >();
253
+
254
+ // 32x64x16 float/bfloat16
255
+ std::cout << " 32x64x16 float/bfloat16" << std::endl;
256
+ // A
257
+ test_get_coord_op<bfloat16, float , /* TM*/ 32 , /* TK*/ 16 , use::a,
258
+ layout::row_major, 1 >();
259
+ // B
260
+ // Duplicate from 1x64x16
261
+ // test_get_coord_op<bfloat16, float, /*TK*/ 16, /*TN*/ 64, use::b,
262
+ // layout::ext_intel_packed, 2>();
263
+ // test_get_coord_op<bfloat16, float, /*TK*/ 16, /*TN*/ 64, use::b,
264
+ // layout::row_major, 1>();
265
+ // Accumulator
266
+ test_get_coord_op<bfloat16, float , /* TM*/ 32 , /* TN*/ 64 , use::accumulator,
267
+ layout::row_major, 1 >();
268
+ test_get_coord_op<float , float , /* TM*/ 32 , /* TN*/ 64 , use::accumulator,
269
+ layout::row_major, 1 >();
270
+
271
+ // // 32x64x32 float/bfloat16
272
+ std::cout << " 32x64x32 float/bfloat16" << std::endl;
273
+ // A
274
+ test_get_coord_op<bfloat16, float , /* TM*/ 32 , /* TK*/ 32 , use::a,
275
+ layout::row_major, 1 >();
276
+ // B
277
+ // Duplicate from 1x64x32
278
+ // test_get_coord_op<bfloat16, float, /*TK*/ 32, /*TN*/ 64, use::b,
279
+ // layout::ext_intel_packed, 2>();
280
+ // test_get_coord_op<bfloat16, float, /*TK*/ 32, /*TN*/ 64, use::b,
281
+ // layout::row_major, 1>();
282
+ // Accumulator
283
+ test_get_coord_op<bfloat16, float , /* TM*/ 32 , /* TN*/ 64 , use::accumulator,
284
+ layout::row_major, 1 >();
285
+ test_get_coord_op<float , float , /* TM*/ 32 , /* TN*/ 64 , use::accumulator,
195
286
layout::row_major, 1 >();
196
287
#endif
197
288
break ;
0 commit comments