Skip to content

Commit 93d48f9

Browse files
committed
Handles XDL_ write VGPR VALU WAW Wait States
1 parent ffa6990 commit 93d48f9

8 files changed

+398
-433
lines changed

llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp

Lines changed: 14 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -2290,14 +2290,12 @@ GFX940_SMFMA_N_PassWritesVGPROverlappedSrcABWaitStates(int NumPasses) {
22902290
return NumPasses + 2;
22912291
}
22922292

2293-
static int GFX940_XDL_N_PassWritesVGPROverlappedSrcABWaitStates(int NumPasses,
2294-
bool IsGFX950) {
2295-
// xdl def cycles | gfx940 | gfx950
2296-
// 2 pass | 5 5
2297-
// 4 pass | 7 8
2298-
// 8 pass | 11 12
2299-
// 16 pass | 19 20
2300-
return NumPasses + 3 + (NumPasses != 2 && IsGFX950);
2293+
static int GFX940_XDL_N_PassWritesVGPROverlappedSrcABWaitStates(int NumPasses) {
2294+
// 2 pass -> 5
2295+
// 4 pass -> 7
2296+
// 8 pass -> 11
2297+
// 16 pass -> 19
2298+
return NumPasses + 3;
23012299
}
23022300

23032301
int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) {
@@ -2466,7 +2464,7 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) {
24662464
NeedWaitStates =
24672465
isXDL(ST, *MI1)
24682466
? GFX940_XDL_N_PassWritesVGPROverlappedSrcABWaitStates(
2469-
NumPasses, ST.hasGFX950Insts())
2467+
NumPasses)
24702468
: GFX940_SMFMA_N_PassWritesVGPROverlappedSrcABWaitStates(
24712469
NumPasses);
24722470
break;
@@ -2608,14 +2606,12 @@ static int GFX940_XDL_N_PassWriteVgprVALUWawWaitStates(int NumPasses,
26082606
return NumPasses + 3 + (NumPasses != 2 && IsGFX950);
26092607
}
26102608

2611-
static int GFX940_XDL_N_PassWriteVgprVALUMemExpReadWaitStates(int NumPasses,
2612-
bool IsGFX950) {
2613-
// xdl def cycles | gfx940 | gfx950
2614-
// 2 pass | 5 5
2615-
// 4 pass | 7 8
2616-
// 8 pass | 11 12
2617-
// 16 pass | 19 20
2618-
return NumPasses + 3 + (NumPasses != 2 && IsGFX950);
2609+
static int GFX940_XDL_N_PassWriteVgprVALUMemExpReadWaitStates(int NumPasses) {
2610+
// 2 pass -> 5
2611+
// 4 pass -> 7
2612+
// 8 pass -> 11
2613+
// 16 pass -> 19
2614+
return NumPasses + 3;
26192615
}
26202616

26212617
static int GFX940_SMFMA_N_PassWriteVgprVALUMemExpReadWaitStates(int NumPasses) {
@@ -2766,8 +2762,7 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) {
27662762
} else if (ST.hasGFX940Insts()) {
27672763
NeedWaitStates =
27682764
isXDL(ST, *MFMA)
2769-
? GFX940_XDL_N_PassWriteVgprVALUMemExpReadWaitStates(
2770-
NumPasses, ST.hasGFX950Insts())
2765+
? GFX940_XDL_N_PassWriteVgprVALUMemExpReadWaitStates(NumPasses)
27712766
: GFX940_SMFMA_N_PassWriteVgprVALUMemExpReadWaitStates(
27722767
NumPasses);
27732768
} else {

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx950.bf16.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_bf16(<8 x bfloat> %arg0, <8 x
4949
; GCN-NEXT: v_mov_b32_e32 v9, s17
5050
; GCN-NEXT: v_mov_b32_e32 v10, s18
5151
; GCN-NEXT: v_mov_b32_e32 v11, s19
52-
; GCN-NEXT: s_nop 4
52+
; GCN-NEXT: s_nop 3
5353
; GCN-NEXT: global_store_dwordx4 v[12:13], a[28:31], off sc0 sc1
5454
; GCN-NEXT: s_waitcnt vmcnt(0)
5555
; GCN-NEXT: global_store_dwordx4 v[14:15], a[24:27], off sc0 sc1
@@ -122,7 +122,7 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_bf16__flags(<8 x bfloat> %arg0
122122
; GCN-NEXT: v_mov_b32_e32 v9, s17
123123
; GCN-NEXT: v_mov_b32_e32 v10, s18
124124
; GCN-NEXT: v_mov_b32_e32 v11, s19
125-
; GCN-NEXT: s_nop 4
125+
; GCN-NEXT: s_nop 3
126126
; GCN-NEXT: global_store_dwordx4 v[12:13], a[28:31], off sc0 sc1
127127
; GCN-NEXT: s_waitcnt vmcnt(0)
128128
; GCN-NEXT: global_store_dwordx4 v[14:15], a[24:27], off sc0 sc1
@@ -179,7 +179,7 @@ define <16 x float> @test_mfma_f32_32x32x16_bf16__mac(<8 x bfloat> %arg0, <8 x b
179179
; GCN-NEXT: s_nop 1
180180
; GCN-NEXT: v_mfma_f32_32x32x16_bf16 a[0:15], v[0:3], v[4:7], a[0:15]
181181
; GCN-NEXT: s_nop 7
182-
; GCN-NEXT: s_nop 3
182+
; GCN-NEXT: s_nop 2
183183
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
184184
; GCN-NEXT: v_accvgpr_read_b32 v1, a1
185185
; GCN-NEXT: v_accvgpr_read_b32 v2, a2
@@ -224,7 +224,7 @@ define <16 x float> @test_mfma_f32_32x32x16_bf16__mac__flags(<8 x bfloat> %arg0,
224224
; GCN-NEXT: s_nop 1
225225
; GCN-NEXT: v_mfma_f32_32x32x16_bf16 a[0:15], v[0:3], v[4:7], a[0:15] cbsz:1 abid:1 blgp:1
226226
; GCN-NEXT: s_nop 7
227-
; GCN-NEXT: s_nop 3
227+
; GCN-NEXT: s_nop 2
228228
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
229229
; GCN-NEXT: v_accvgpr_read_b32 v1, a1
230230
; GCN-NEXT: v_accvgpr_read_b32 v2, a2
@@ -417,7 +417,7 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_bf16__vgprcd_mac(<8 x bfloat>
417417
; GCN-NEXT: v_mfma_f32_32x32x16_bf16 a[0:15], v[0:3], v[4:7], a[0:15]
418418
; GCN-NEXT: v_mov_b32_e32 v0, 0
419419
; GCN-NEXT: s_nop 7
420-
; GCN-NEXT: s_nop 2
420+
; GCN-NEXT: s_nop 1
421421
; GCN-NEXT: global_store_dwordx4 v0, a[12:15], s[0:1] offset:48
422422
; GCN-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32
423423
; GCN-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16
@@ -459,7 +459,7 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_bf16__vgprcd_mac_flags(<8 x bf
459459
; GCN-NEXT: v_mfma_f32_32x32x16_bf16 a[0:15], v[0:3], v[4:7], a[0:15] cbsz:3 abid:2 blgp:1
460460
; GCN-NEXT: v_mov_b32_e32 v0, 0
461461
; GCN-NEXT: s_nop 7
462-
; GCN-NEXT: s_nop 2
462+
; GCN-NEXT: s_nop 1
463463
; GCN-NEXT: global_store_dwordx4 v0, a[12:15], s[0:1] offset:48
464464
; GCN-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32
465465
; GCN-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16

0 commit comments

Comments
 (0)