Skip to content

Commit ffa6990

Browse files
committed
lit test fixes
1 parent f31d6d2 commit ffa6990

5 files changed

+352
-352
lines changed

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx950.bf16.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_bf16(<8 x bfloat> %arg0, <8 x
4949
; GCN-NEXT: v_mov_b32_e32 v9, s17
5050
; GCN-NEXT: v_mov_b32_e32 v10, s18
5151
; GCN-NEXT: v_mov_b32_e32 v11, s19
52-
; GCN-NEXT: s_nop 3
52+
; GCN-NEXT: s_nop 4
5353
; GCN-NEXT: global_store_dwordx4 v[12:13], a[28:31], off sc0 sc1
5454
; GCN-NEXT: s_waitcnt vmcnt(0)
5555
; GCN-NEXT: global_store_dwordx4 v[14:15], a[24:27], off sc0 sc1
@@ -122,7 +122,7 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_bf16__flags(<8 x bfloat> %arg0
122122
; GCN-NEXT: v_mov_b32_e32 v9, s17
123123
; GCN-NEXT: v_mov_b32_e32 v10, s18
124124
; GCN-NEXT: v_mov_b32_e32 v11, s19
125-
; GCN-NEXT: s_nop 3
125+
; GCN-NEXT: s_nop 4
126126
; GCN-NEXT: global_store_dwordx4 v[12:13], a[28:31], off sc0 sc1
127127
; GCN-NEXT: s_waitcnt vmcnt(0)
128128
; GCN-NEXT: global_store_dwordx4 v[14:15], a[24:27], off sc0 sc1
@@ -179,7 +179,7 @@ define <16 x float> @test_mfma_f32_32x32x16_bf16__mac(<8 x bfloat> %arg0, <8 x b
179179
; GCN-NEXT: s_nop 1
180180
; GCN-NEXT: v_mfma_f32_32x32x16_bf16 a[0:15], v[0:3], v[4:7], a[0:15]
181181
; GCN-NEXT: s_nop 7
182-
; GCN-NEXT: s_nop 2
182+
; GCN-NEXT: s_nop 3
183183
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
184184
; GCN-NEXT: v_accvgpr_read_b32 v1, a1
185185
; GCN-NEXT: v_accvgpr_read_b32 v2, a2
@@ -224,7 +224,7 @@ define <16 x float> @test_mfma_f32_32x32x16_bf16__mac__flags(<8 x bfloat> %arg0,
224224
; GCN-NEXT: s_nop 1
225225
; GCN-NEXT: v_mfma_f32_32x32x16_bf16 a[0:15], v[0:3], v[4:7], a[0:15] cbsz:1 abid:1 blgp:1
226226
; GCN-NEXT: s_nop 7
227-
; GCN-NEXT: s_nop 2
227+
; GCN-NEXT: s_nop 3
228228
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
229229
; GCN-NEXT: v_accvgpr_read_b32 v1, a1
230230
; GCN-NEXT: v_accvgpr_read_b32 v2, a2
@@ -417,7 +417,7 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_bf16__vgprcd_mac(<8 x bfloat>
417417
; GCN-NEXT: v_mfma_f32_32x32x16_bf16 a[0:15], v[0:3], v[4:7], a[0:15]
418418
; GCN-NEXT: v_mov_b32_e32 v0, 0
419419
; GCN-NEXT: s_nop 7
420-
; GCN-NEXT: s_nop 1
420+
; GCN-NEXT: s_nop 2
421421
; GCN-NEXT: global_store_dwordx4 v0, a[12:15], s[0:1] offset:48
422422
; GCN-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32
423423
; GCN-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16
@@ -459,7 +459,7 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_bf16__vgprcd_mac_flags(<8 x bf
459459
; GCN-NEXT: v_mfma_f32_32x32x16_bf16 a[0:15], v[0:3], v[4:7], a[0:15] cbsz:3 abid:2 blgp:1
460460
; GCN-NEXT: v_mov_b32_e32 v0, 0
461461
; GCN-NEXT: s_nop 7
462-
; GCN-NEXT: s_nop 1
462+
; GCN-NEXT: s_nop 2
463463
; GCN-NEXT: global_store_dwordx4 v0, a[12:15], s[0:1] offset:48
464464
; GCN-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32
465465
; GCN-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx950.ll

Lines changed: 36 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ define <4 x float> @test_mfma_f32_16x16x32_f16(<8 x half> %arg0, <8 x half> %arg
1919
; GCN-NEXT: v_accvgpr_write_b32 a3, v11
2020
; GCN-NEXT: s_nop 1
2121
; GCN-NEXT: v_mfma_f32_16x16x32_f16 a[0:3], v[0:3], v[4:7], a[0:3]
22-
; GCN-NEXT: s_nop 6
22+
; GCN-NEXT: s_nop 7
2323
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
2424
; GCN-NEXT: v_accvgpr_read_b32 v1, a1
2525
; GCN-NEXT: v_accvgpr_read_b32 v2, a2
@@ -39,7 +39,7 @@ define <4 x float> @test_mfma_f32_16x16x32_f16__flags(<8 x half> %arg0, <8 x hal
3939
; GCN-NEXT: v_accvgpr_write_b32 a3, v11
4040
; GCN-NEXT: s_nop 1
4141
; GCN-NEXT: v_mfma_f32_16x16x32_f16 a[0:3], v[0:3], v[4:7], a[0:3] cbsz:1 abid:1 blgp:1
42-
; GCN-NEXT: s_nop 6
42+
; GCN-NEXT: s_nop 7
4343
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
4444
; GCN-NEXT: v_accvgpr_read_b32 v1, a1
4545
; GCN-NEXT: v_accvgpr_read_b32 v2, a2
@@ -67,7 +67,7 @@ define amdgpu_kernel void @test_mfma_f32_16x16x32_f16_no_agpr__vgprcd(ptr addrsp
6767
; SDAG-NEXT: v_accvgpr_write_b32 a3, s3
6868
; SDAG-NEXT: s_nop 1
6969
; SDAG-NEXT: v_mfma_f32_16x16x32_f16 a[0:3], v[0:3], v[4:7], a[0:3]
70-
; SDAG-NEXT: s_nop 6
70+
; SDAG-NEXT: s_nop 7
7171
; SDAG-NEXT: global_store_dwordx4 v8, a[0:3], s[6:7]
7272
; SDAG-NEXT: s_endpgm
7373
;
@@ -88,7 +88,7 @@ define amdgpu_kernel void @test_mfma_f32_16x16x32_f16_no_agpr__vgprcd(ptr addrsp
8888
; GISEL-NEXT: s_nop 1
8989
; GISEL-NEXT: v_mfma_f32_16x16x32_f16 a[0:3], v[0:3], v[4:7], a[0:3]
9090
; GISEL-NEXT: v_mov_b32_e32 v0, 0
91-
; GISEL-NEXT: s_nop 5
91+
; GISEL-NEXT: s_nop 6
9292
; GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[6:7]
9393
; GISEL-NEXT: s_endpgm
9494
%result = call <4 x float> @llvm.amdgcn.mfma.f32.16x16x32.f16(<8 x half> %arg0, <8 x half> %arg1, <4 x float> %arg2, i32 0, i32 0, i32 0)
@@ -114,7 +114,7 @@ define amdgpu_kernel void @test_mfma_f32_16x16x32_f16_no_agpr__vgprcd__flags(ptr
114114
; SDAG-NEXT: v_accvgpr_write_b32 a3, s3
115115
; SDAG-NEXT: s_nop 1
116116
; SDAG-NEXT: v_mfma_f32_16x16x32_f16 a[0:3], v[0:3], v[4:7], a[0:3] cbsz:3 abid:2 blgp:1
117-
; SDAG-NEXT: s_nop 6
117+
; SDAG-NEXT: s_nop 7
118118
; SDAG-NEXT: global_store_dwordx4 v8, a[0:3], s[6:7]
119119
; SDAG-NEXT: s_endpgm
120120
;
@@ -135,7 +135,7 @@ define amdgpu_kernel void @test_mfma_f32_16x16x32_f16_no_agpr__vgprcd__flags(ptr
135135
; GISEL-NEXT: s_nop 1
136136
; GISEL-NEXT: v_mfma_f32_16x16x32_f16 a[0:3], v[0:3], v[4:7], a[0:3] cbsz:3 abid:2 blgp:1
137137
; GISEL-NEXT: v_mov_b32_e32 v0, 0
138-
; GISEL-NEXT: s_nop 5
138+
; GISEL-NEXT: s_nop 6
139139
; GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[6:7]
140140
; GISEL-NEXT: s_endpgm
141141
%result = call <4 x float> @llvm.amdgcn.mfma.f32.16x16x32.f16(<8 x half> %arg0, <8 x half> %arg1, <4 x float> %arg2, i32 3, i32 2, i32 1)
@@ -186,7 +186,7 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16(<8 x half> %arg0, <8 x hal
186186
; SDAG-NEXT: v_mov_b32_e32 v9, s17
187187
; SDAG-NEXT: v_mov_b32_e32 v10, s18
188188
; SDAG-NEXT: v_mov_b32_e32 v11, s19
189-
; SDAG-NEXT: s_nop 3
189+
; SDAG-NEXT: s_nop 4
190190
; SDAG-NEXT: global_store_dwordx4 v[12:13], a[28:31], off sc0 sc1
191191
; SDAG-NEXT: s_waitcnt vmcnt(0)
192192
; SDAG-NEXT: global_store_dwordx4 v[14:15], a[24:27], off sc0 sc1
@@ -253,7 +253,7 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16(<8 x half> %arg0, <8 x hal
253253
; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[10:11]
254254
; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[14:15]
255255
; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[18:19]
256-
; GISEL-NEXT: s_nop 3
256+
; GISEL-NEXT: s_nop 4
257257
; GISEL-NEXT: global_store_dwordx4 v[20:21], a[16:19], off sc0 sc1
258258
; GISEL-NEXT: s_waitcnt vmcnt(0)
259259
; GISEL-NEXT: global_store_dwordx4 v[22:23], a[20:23], off sc0 sc1
@@ -316,7 +316,7 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__flags(<8 x half> %arg0, <
316316
; SDAG-NEXT: v_mov_b32_e32 v9, s17
317317
; SDAG-NEXT: v_mov_b32_e32 v10, s18
318318
; SDAG-NEXT: v_mov_b32_e32 v11, s19
319-
; SDAG-NEXT: s_nop 3
319+
; SDAG-NEXT: s_nop 4
320320
; SDAG-NEXT: global_store_dwordx4 v[12:13], a[28:31], off sc0 sc1
321321
; SDAG-NEXT: s_waitcnt vmcnt(0)
322322
; SDAG-NEXT: global_store_dwordx4 v[14:15], a[24:27], off sc0 sc1
@@ -383,7 +383,7 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__flags(<8 x half> %arg0, <
383383
; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[10:11]
384384
; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[14:15]
385385
; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[18:19]
386-
; GISEL-NEXT: s_nop 3
386+
; GISEL-NEXT: s_nop 4
387387
; GISEL-NEXT: global_store_dwordx4 v[20:21], a[16:19], off sc0 sc1
388388
; GISEL-NEXT: s_waitcnt vmcnt(0)
389389
; GISEL-NEXT: global_store_dwordx4 v[22:23], a[20:23], off sc0 sc1
@@ -430,7 +430,7 @@ define <16 x float> @test_mfma_f32_32x32x16_f16__mac(<8 x half> %arg0, <8 x half
430430
; GCN-NEXT: s_nop 1
431431
; GCN-NEXT: v_mfma_f32_32x32x16_f16 a[0:15], v[0:3], v[4:7], a[0:15]
432432
; GCN-NEXT: s_nop 7
433-
; GCN-NEXT: s_nop 2
433+
; GCN-NEXT: s_nop 3
434434
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
435435
; GCN-NEXT: v_accvgpr_read_b32 v1, a1
436436
; GCN-NEXT: v_accvgpr_read_b32 v2, a2
@@ -475,7 +475,7 @@ define <16 x float> @test_mfma_f32_32x32x16_f16__mac__flags(<8 x half> %arg0, <8
475475
; GCN-NEXT: s_nop 1
476476
; GCN-NEXT: v_mfma_f32_32x32x16_f16 a[0:15], v[0:3], v[4:7], a[0:15] cbsz:1 abid:1 blgp:1
477477
; GCN-NEXT: s_nop 7
478-
; GCN-NEXT: s_nop 2
478+
; GCN-NEXT: s_nop 3
479479
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
480480
; GCN-NEXT: v_accvgpr_read_b32 v1, a1
481481
; GCN-NEXT: v_accvgpr_read_b32 v2, a2
@@ -776,7 +776,7 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__vgprcd_mac(<8 x half> %ar
776776
; SDAG-NEXT: v_mfma_f32_32x32x16_f16 a[0:15], v[0:3], v[4:7], a[0:15]
777777
; SDAG-NEXT: v_mov_b32_e32 v0, 0
778778
; SDAG-NEXT: s_nop 7
779-
; SDAG-NEXT: s_nop 1
779+
; SDAG-NEXT: s_nop 2
780780
; SDAG-NEXT: global_store_dwordx4 v0, a[12:15], s[0:1] offset:48
781781
; SDAG-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32
782782
; SDAG-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16
@@ -813,7 +813,7 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__vgprcd_mac(<8 x half> %ar
813813
; GISEL-NEXT: v_mfma_f32_32x32x16_f16 a[0:15], v[0:3], v[4:7], a[0:15]
814814
; GISEL-NEXT: v_mov_b32_e32 v0, 0
815815
; GISEL-NEXT: s_nop 7
816-
; GISEL-NEXT: s_nop 1
816+
; GISEL-NEXT: s_nop 2
817817
; GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[0:1]
818818
; GISEL-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16
819819
; GISEL-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32
@@ -855,7 +855,7 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__vgprcd_mac_flags(<8 x hal
855855
; SDAG-NEXT: v_mfma_f32_32x32x16_f16 a[0:15], v[0:3], v[4:7], a[0:15] cbsz:3 abid:2 blgp:1
856856
; SDAG-NEXT: v_mov_b32_e32 v0, 0
857857
; SDAG-NEXT: s_nop 7
858-
; SDAG-NEXT: s_nop 1
858+
; SDAG-NEXT: s_nop 2
859859
; SDAG-NEXT: global_store_dwordx4 v0, a[12:15], s[0:1] offset:48
860860
; SDAG-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32
861861
; SDAG-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16
@@ -892,7 +892,7 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__vgprcd_mac_flags(<8 x hal
892892
; GISEL-NEXT: v_mfma_f32_32x32x16_f16 a[0:15], v[0:3], v[4:7], a[0:15] cbsz:3 abid:2 blgp:1
893893
; GISEL-NEXT: v_mov_b32_e32 v0, 0
894894
; GISEL-NEXT: s_nop 7
895-
; GISEL-NEXT: s_nop 1
895+
; GISEL-NEXT: s_nop 2
896896
; GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[0:1]
897897
; GISEL-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16
898898
; GISEL-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32
@@ -919,7 +919,7 @@ define <4 x i32> @test_mfma_i32_16x16x64_i8(<4 x i32> %arg0, <4 x i32> %arg1, <4
919919
; GCN-NEXT: v_accvgpr_write_b32 a3, v11
920920
; GCN-NEXT: s_nop 1
921921
; GCN-NEXT: v_mfma_i32_16x16x64_i8 a[0:3], v[0:3], v[4:7], a[0:3]
922-
; GCN-NEXT: s_nop 6
922+
; GCN-NEXT: s_nop 7
923923
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
924924
; GCN-NEXT: v_accvgpr_read_b32 v1, a1
925925
; GCN-NEXT: v_accvgpr_read_b32 v2, a2
@@ -939,7 +939,7 @@ define <4 x i32> @test_mfma_i32_16x16x64_i8__flags(<4 x i32> %arg0, <4 x i32> %a
939939
; GCN-NEXT: v_accvgpr_write_b32 a3, v11
940940
; GCN-NEXT: s_nop 1
941941
; GCN-NEXT: v_mfma_i32_16x16x64_i8 a[0:3], v[0:3], v[4:7], a[0:3] cbsz:1 abid:1 blgp:1
942-
; GCN-NEXT: s_nop 6
942+
; GCN-NEXT: s_nop 7
943943
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
944944
; GCN-NEXT: v_accvgpr_read_b32 v1, a1
945945
; GCN-NEXT: v_accvgpr_read_b32 v2, a2
@@ -971,7 +971,7 @@ define amdgpu_kernel void @test_mfma_i32_16x16x64_i8_no_agpr__vgprcd(ptr addrspa
971971
; SDAG-NEXT: v_accvgpr_write_b32 a3, s3
972972
; SDAG-NEXT: s_nop 1
973973
; SDAG-NEXT: v_mfma_i32_16x16x64_i8 a[0:3], v[0:3], v[4:7], a[0:3]
974-
; SDAG-NEXT: s_nop 6
974+
; SDAG-NEXT: s_nop 7
975975
; SDAG-NEXT: global_store_dwordx4 v8, a[0:3], s[6:7]
976976
; SDAG-NEXT: s_endpgm
977977
;
@@ -992,7 +992,7 @@ define amdgpu_kernel void @test_mfma_i32_16x16x64_i8_no_agpr__vgprcd(ptr addrspa
992992
; GISEL-NEXT: s_nop 1
993993
; GISEL-NEXT: v_mfma_i32_16x16x64_i8 a[0:3], v[0:3], v[4:7], a[0:3]
994994
; GISEL-NEXT: v_mov_b32_e32 v0, 0
995-
; GISEL-NEXT: s_nop 5
995+
; GISEL-NEXT: s_nop 6
996996
; GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[6:7]
997997
; GISEL-NEXT: s_endpgm
998998
%result = call <4 x i32> @llvm.amdgcn.mfma.i32.16x16x64.i8(<4 x i32> %arg0, <4 x i32> %arg1, <4 x i32> %arg2, i32 0, i32 0, i32 0)
@@ -1022,7 +1022,7 @@ define amdgpu_kernel void @test_mfma_i32_16x16x64_i8_no_agpr__vgprcd__flags(ptr
10221022
; SDAG-NEXT: v_accvgpr_write_b32 a3, s3
10231023
; SDAG-NEXT: s_nop 1
10241024
; SDAG-NEXT: v_mfma_i32_16x16x64_i8 a[0:3], v[0:3], v[4:7], a[0:3] cbsz:3 abid:2 blgp:1
1025-
; SDAG-NEXT: s_nop 6
1025+
; SDAG-NEXT: s_nop 7
10261026
; SDAG-NEXT: global_store_dwordx4 v8, a[0:3], s[6:7]
10271027
; SDAG-NEXT: s_endpgm
10281028
;
@@ -1043,7 +1043,7 @@ define amdgpu_kernel void @test_mfma_i32_16x16x64_i8_no_agpr__vgprcd__flags(ptr
10431043
; GISEL-NEXT: s_nop 1
10441044
; GISEL-NEXT: v_mfma_i32_16x16x64_i8 a[0:3], v[0:3], v[4:7], a[0:3] cbsz:3 abid:2 blgp:1
10451045
; GISEL-NEXT: v_mov_b32_e32 v0, 0
1046-
; GISEL-NEXT: s_nop 5
1046+
; GISEL-NEXT: s_nop 6
10471047
; GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[6:7]
10481048
; GISEL-NEXT: s_endpgm
10491049
%result = call <4 x i32> @llvm.amdgcn.mfma.i32.16x16x64.i8(<4 x i32> %arg0, <4 x i32> %arg1, <4 x i32> %arg2, i32 3, i32 2, i32 1)
@@ -1097,7 +1097,7 @@ define amdgpu_kernel void @test_mfma_i32_32x32x32_i8(<4 x i32> %arg0, <4 x i32>
10971097
; SDAG-NEXT: v_mov_b32_e32 v1, s17
10981098
; SDAG-NEXT: v_mov_b32_e32 v2, s18
10991099
; SDAG-NEXT: v_mov_b32_e32 v3, s19
1100-
; SDAG-NEXT: s_nop 6
1100+
; SDAG-NEXT: s_nop 7
11011101
; SDAG-NEXT: global_store_dwordx4 v[8:9], a[28:31], off sc0 sc1
11021102
; SDAG-NEXT: s_waitcnt vmcnt(0)
11031103
; SDAG-NEXT: global_store_dwordx4 v[10:11], a[24:27], off sc0 sc1
@@ -1169,7 +1169,7 @@ define amdgpu_kernel void @test_mfma_i32_32x32x32_i8(<4 x i32> %arg0, <4 x i32>
11691169
; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[10:11]
11701170
; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[14:15]
11711171
; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[18:19]
1172-
; GISEL-NEXT: s_nop 3
1172+
; GISEL-NEXT: s_nop 4
11731173
; GISEL-NEXT: global_store_dwordx4 v[20:21], a[16:19], off sc0 sc1
11741174
; GISEL-NEXT: s_waitcnt vmcnt(0)
11751175
; GISEL-NEXT: global_store_dwordx4 v[22:23], a[20:23], off sc0 sc1
@@ -1233,7 +1233,7 @@ define amdgpu_kernel void @test_mfma_i32_32x32x32_i8__flags(<4 x i32> %arg0, <4
12331233
; SDAG-NEXT: v_mov_b32_e32 v1, s17
12341234
; SDAG-NEXT: v_mov_b32_e32 v2, s18
12351235
; SDAG-NEXT: v_mov_b32_e32 v3, s19
1236-
; SDAG-NEXT: s_nop 6
1236+
; SDAG-NEXT: s_nop 7
12371237
; SDAG-NEXT: global_store_dwordx4 v[8:9], a[28:31], off sc0 sc1
12381238
; SDAG-NEXT: s_waitcnt vmcnt(0)
12391239
; SDAG-NEXT: global_store_dwordx4 v[10:11], a[24:27], off sc0 sc1
@@ -1305,7 +1305,7 @@ define amdgpu_kernel void @test_mfma_i32_32x32x32_i8__flags(<4 x i32> %arg0, <4
13051305
; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[10:11]
13061306
; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[14:15]
13071307
; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[18:19]
1308-
; GISEL-NEXT: s_nop 3
1308+
; GISEL-NEXT: s_nop 4
13091309
; GISEL-NEXT: global_store_dwordx4 v[20:21], a[16:19], off sc0 sc1
13101310
; GISEL-NEXT: s_waitcnt vmcnt(0)
13111311
; GISEL-NEXT: global_store_dwordx4 v[22:23], a[20:23], off sc0 sc1
@@ -1352,7 +1352,7 @@ define <16 x i32> @test_mfma_i32_32x32x32_i8__mac(<4 x i32> %arg0, <4 x i32> %ar
13521352
; GCN-NEXT: s_nop 1
13531353
; GCN-NEXT: v_mfma_i32_32x32x32_i8 a[0:15], v[0:3], v[4:7], a[0:15]
13541354
; GCN-NEXT: s_nop 7
1355-
; GCN-NEXT: s_nop 2
1355+
; GCN-NEXT: s_nop 3
13561356
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
13571357
; GCN-NEXT: v_accvgpr_read_b32 v1, a1
13581358
; GCN-NEXT: v_accvgpr_read_b32 v2, a2
@@ -1397,7 +1397,7 @@ define <16 x i32> @test_mfma_i32_32x32x32_i8__mac__flags(<4 x i32> %arg0, <4 x i
13971397
; GCN-NEXT: s_nop 1
13981398
; GCN-NEXT: v_mfma_i32_32x32x32_i8 a[0:15], v[0:3], v[4:7], a[0:15] cbsz:1 abid:1 blgp:1
13991399
; GCN-NEXT: s_nop 7
1400-
; GCN-NEXT: s_nop 2
1400+
; GCN-NEXT: s_nop 3
14011401
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
14021402
; GCN-NEXT: v_accvgpr_read_b32 v1, a1
14031403
; GCN-NEXT: v_accvgpr_read_b32 v2, a2
@@ -1717,7 +1717,7 @@ define amdgpu_kernel void @test_mfma_i32_32x32x32_i8__vgprcd_mac(<4 x i32> %arg0
17171717
; SDAG-NEXT: v_mfma_i32_32x32x32_i8 a[0:15], v[0:3], v[4:7], a[0:15]
17181718
; SDAG-NEXT: v_mov_b32_e32 v0, 0
17191719
; SDAG-NEXT: s_nop 7
1720-
; SDAG-NEXT: s_nop 1
1720+
; SDAG-NEXT: s_nop 2
17211721
; SDAG-NEXT: global_store_dwordx4 v0, a[12:15], s[0:1] offset:48
17221722
; SDAG-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32
17231723
; SDAG-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16
@@ -1754,7 +1754,7 @@ define amdgpu_kernel void @test_mfma_i32_32x32x32_i8__vgprcd_mac(<4 x i32> %arg0
17541754
; GISEL-NEXT: v_mfma_i32_32x32x32_i8 a[0:15], v[0:3], v[4:7], a[0:15]
17551755
; GISEL-NEXT: v_mov_b32_e32 v0, 0
17561756
; GISEL-NEXT: s_nop 7
1757-
; GISEL-NEXT: s_nop 1
1757+
; GISEL-NEXT: s_nop 2
17581758
; GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[0:1]
17591759
; GISEL-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16
17601760
; GISEL-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32
@@ -1801,7 +1801,7 @@ define amdgpu_kernel void @test_mfma_i32_32x32x32_i8__vgprcd_mac_flags(<4 x i32>
18011801
; SDAG-NEXT: v_mfma_i32_32x32x32_i8 a[0:15], v[0:3], v[4:7], a[0:15] cbsz:3 abid:2 blgp:1
18021802
; SDAG-NEXT: v_mov_b32_e32 v0, 0
18031803
; SDAG-NEXT: s_nop 7
1804-
; SDAG-NEXT: s_nop 1
1804+
; SDAG-NEXT: s_nop 2
18051805
; SDAG-NEXT: global_store_dwordx4 v0, a[12:15], s[0:1] offset:48
18061806
; SDAG-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32
18071807
; SDAG-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16
@@ -1838,7 +1838,7 @@ define amdgpu_kernel void @test_mfma_i32_32x32x32_i8__vgprcd_mac_flags(<4 x i32>
18381838
; GISEL-NEXT: v_mfma_i32_32x32x32_i8 a[0:15], v[0:3], v[4:7], a[0:15] cbsz:3 abid:2 blgp:1
18391839
; GISEL-NEXT: v_mov_b32_e32 v0, 0
18401840
; GISEL-NEXT: s_nop 7
1841-
; GISEL-NEXT: s_nop 1
1841+
; GISEL-NEXT: s_nop 2
18421842
; GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[0:1]
18431843
; GISEL-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16
18441844
; GISEL-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32
@@ -1865,7 +1865,7 @@ define <4 x float> @test_mfma_f32_16x16x32_bf16(<8 x bfloat> %arg0, <8 x bfloat>
18651865
; GCN-NEXT: v_accvgpr_write_b32 a3, v11
18661866
; GCN-NEXT: s_nop 1
18671867
; GCN-NEXT: v_mfma_f32_16x16x32_bf16 a[0:3], v[0:3], v[4:7], a[0:3]
1868-
; GCN-NEXT: s_nop 6
1868+
; GCN-NEXT: s_nop 7
18691869
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
18701870
; GCN-NEXT: v_accvgpr_read_b32 v1, a1
18711871
; GCN-NEXT: v_accvgpr_read_b32 v2, a2
@@ -1885,7 +1885,7 @@ define <4 x float> @test_mfma_f32_16x16x32_bf16__flags(<8 x bfloat> %arg0, <8 x
18851885
; GCN-NEXT: v_accvgpr_write_b32 a3, v11
18861886
; GCN-NEXT: s_nop 1
18871887
; GCN-NEXT: v_mfma_f32_16x16x32_bf16 a[0:3], v[0:3], v[4:7], a[0:3] cbsz:1 abid:1 blgp:1
1888-
; GCN-NEXT: s_nop 6
1888+
; GCN-NEXT: s_nop 7
18891889
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
18901890
; GCN-NEXT: v_accvgpr_read_b32 v1, a1
18911891
; GCN-NEXT: v_accvgpr_read_b32 v2, a2
@@ -1913,7 +1913,7 @@ define amdgpu_kernel void @test_mfma_f32_16x16x32_bf16_no_agpr__vgprcd(ptr addrs
19131913
; GCN-NEXT: v_accvgpr_write_b32 a3, s3
19141914
; GCN-NEXT: s_nop 1
19151915
; GCN-NEXT: v_mfma_f32_16x16x32_bf16 a[0:3], v[0:3], v[4:7], a[0:3]
1916-
; GCN-NEXT: s_nop 6
1916+
; GCN-NEXT: s_nop 7
19171917
; GCN-NEXT: global_store_dwordx4 v8, a[0:3], s[6:7]
19181918
; GCN-NEXT: s_endpgm
19191919
%result = call <4 x float> @llvm.amdgcn.mfma.f32.16x16x32.bf16(<8 x bfloat> %arg0, <8 x bfloat> %arg1, <4 x float> %arg2, i32 0, i32 0, i32 0)
@@ -1939,7 +1939,7 @@ define amdgpu_kernel void @test_mfma_f32_16x16x32_bf16_no_agpr__vgprcd__flags(pt
19391939
; GCN-NEXT: v_accvgpr_write_b32 a3, s3
19401940
; GCN-NEXT: s_nop 1
19411941
; GCN-NEXT: v_mfma_f32_16x16x32_bf16 a[0:3], v[0:3], v[4:7], a[0:3] cbsz:3 abid:2 blgp:1
1942-
; GCN-NEXT: s_nop 6
1942+
; GCN-NEXT: s_nop 7
19431943
; GCN-NEXT: global_store_dwordx4 v8, a[0:3], s[6:7]
19441944
; GCN-NEXT: s_endpgm
19451945
%result = call <4 x float> @llvm.amdgcn.mfma.f32.16x16x32.bf16(<8 x bfloat> %arg0, <8 x bfloat> %arg1, <4 x float> %arg2, i32 3, i32 2, i32 1)

0 commit comments

Comments
 (0)