Skip to content

Commit 5319c9f

Browse files
committed
add v_sat_pk pattern for fake16, add test cases for GFX12, merge main
1 parent 582c56a commit 5319c9f

File tree

2 files changed

+57
-49
lines changed

2 files changed

+57
-49
lines changed

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3318,12 +3318,14 @@ multiclass V_SAT_PK_Pat<Instruction inst> {
33183318

33193319
def: GCNPatIgnoreCopies<
33203320
(i16 (conc_lo_v2i16_i16 (clamp_v2i16_u8 v2i16:$src))),
3321-
(inst VGPR_32:$src)
3321+
(inst VRegSrc_32:$src)
33223322
>;
33233323
}
33243324

3325-
let OtherPredicates = [NotHasTrue16BitInsts] in
3326-
defm : V_SAT_PK_Pat<V_SAT_PK_U8_I16_e64>;
3325+
let OtherPredicates = [NotHasTrue16BitInsts] in {
3326+
defm : V_SAT_PK_Pat<V_SAT_PK_U8_I16_e64>;
3327+
} // End OtherPredicates = [NotHasTrue16BitInsts]
3328+
defm : V_SAT_PK_Pat<V_SAT_PK_U8_I16_fake16_e64>;
33273329

33283330
// With multiple uses of the shift, this will duplicate the shift and
33293331
// increase register pressure.

llvm/test/CodeGen/AMDGPU/v_sat_pk_u8_i16.ll

Lines changed: 52 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -839,6 +839,19 @@ define i16 @basic_smax_smin_bit_or(i16 %src0, i16 %src1) {
839839
; SDAG-GFX12-NEXT: v_or_b32_e32 v0, v0, v1
840840
; SDAG-GFX12-NEXT: s_setpc_b64 s[30:31]
841841
;
842+
; SDAG-GFX12-LABEL: basic_smax_smin_bit_or:
843+
; SDAG-GFX12: ; %bb.0:
844+
; SDAG-GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
845+
; SDAG-GFX12-NEXT: s_wait_expcnt 0x0
846+
; SDAG-GFX12-NEXT: s_wait_samplecnt 0x0
847+
; SDAG-GFX12-NEXT: s_wait_bvhcnt 0x0
848+
; SDAG-GFX12-NEXT: s_wait_kmcnt 0x0
849+
; SDAG-GFX12-NEXT: v_and_b32_e32 v0, 0xffff, v0
850+
; SDAG-GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
851+
; SDAG-GFX12-NEXT: v_lshl_or_b32 v0, v1, 16, v0
852+
; SDAG-GFX12-NEXT: v_sat_pk_u8_i16_e32 v0, v0
853+
; SDAG-GFX12-NEXT: s_setpc_b64 s[30:31]
854+
;
842855
; GISEL-VI-LABEL: basic_smax_smin_bit_or:
843856
; GISEL-VI: ; %bb.0:
844857
; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -926,6 +939,20 @@ define i16 @basic_umax_umin_bit_or(i16 %src0, i16 %src1) {
926939
; SDAG-GFX12-NEXT: v_or_b32_e32 v0, v0, v1
927940
; SDAG-GFX12-NEXT: s_setpc_b64 s[30:31]
928941
;
942+
; SDAG-GFX12-LABEL: basic_umax_umin_bit_or:
943+
; SDAG-GFX12: ; %bb.0:
944+
; SDAG-GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
945+
; SDAG-GFX12-NEXT: s_wait_expcnt 0x0
946+
; SDAG-GFX12-NEXT: s_wait_samplecnt 0x0
947+
; SDAG-GFX12-NEXT: s_wait_bvhcnt 0x0
948+
; SDAG-GFX12-NEXT: s_wait_kmcnt 0x0
949+
; SDAG-GFX12-NEXT: v_min_u16 v1, 0xff, v1
950+
; SDAG-GFX12-NEXT: v_min_u16 v0, 0xff, v0
951+
; SDAG-GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
952+
; SDAG-GFX12-NEXT: v_lshlrev_b16 v1, 8, v1
953+
; SDAG-GFX12-NEXT: v_or_b32_e32 v0, v0, v1
954+
; SDAG-GFX12-NEXT: s_setpc_b64 s[30:31]
955+
;
929956
; GISEL-VI-LABEL: basic_umax_umin_bit_or:
930957
; GISEL-VI: ; %bb.0:
931958
; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -992,13 +1019,10 @@ define i16 @basic_smax_smin_vec_cast(i16 %src0, i16 %src1) {
9921019
; SDAG-GFX11-LABEL: basic_smax_smin_vec_cast:
9931020
; SDAG-GFX11: ; %bb.0:
9941021
; SDAG-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
995-
; SDAG-GFX11-NEXT: v_med3_i16 v1, v1, 0, 0xff
996-
; SDAG-GFX11-NEXT: v_med3_i16 v0, v0, 0, 0xff
997-
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
998-
; SDAG-GFX11-NEXT: v_lshlrev_b16 v1, 8, v1
999-
; SDAG-GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0
1000-
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
1001-
; SDAG-GFX11-NEXT: v_or_b32_e32 v0, v0, v1
1022+
; SDAG-GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
1023+
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1024+
; SDAG-GFX11-NEXT: v_lshl_or_b32 v0, v1, 16, v0
1025+
; SDAG-GFX11-NEXT: v_sat_pk_u8_i16_e32 v0, v0
10021026
; SDAG-GFX11-NEXT: s_setpc_b64 s[30:31]
10031027
;
10041028
; SDAG-GFX12-LABEL: basic_smax_smin_vec_cast:
@@ -1041,11 +1065,10 @@ define i16 @basic_smax_smin_vec_cast(i16 %src0, i16 %src1) {
10411065
; GISEL-GFX11-LABEL: basic_smax_smin_vec_cast:
10421066
; GISEL-GFX11: ; %bb.0:
10431067
; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1044-
; GISEL-GFX11-NEXT: v_med3_i16 v1, v1, 0, 0xff
1045-
; GISEL-GFX11-NEXT: v_med3_i16 v0, v0, 0, 0xff
1046-
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
1047-
; GISEL-GFX11-NEXT: v_lshlrev_b16 v1, 8, v1
1048-
; GISEL-GFX11-NEXT: v_or_b32_e32 v0, v0, v1
1068+
; GISEL-GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
1069+
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1070+
; GISEL-GFX11-NEXT: v_lshl_or_b32 v0, v1, 16, v0
1071+
; GISEL-GFX11-NEXT: v_sat_pk_u8_i16_e32 v0, v0
10491072
; GISEL-GFX11-NEXT: s_setpc_b64 s[30:31]
10501073
;
10511074
; GISEL-GFX12-LABEL: basic_smax_smin_vec_cast:
@@ -1117,6 +1140,19 @@ define i16 @basic_smax_smin_bit_shl(i16 %src0, i16 %src1) {
11171140
; SDAG-GFX12-NEXT: v_or_b32_e32 v0, v0, v1
11181141
; SDAG-GFX12-NEXT: s_setpc_b64 s[30:31]
11191142
;
1143+
; SDAG-GFX12-LABEL: basic_smax_smin_bit_shl:
1144+
; SDAG-GFX12: ; %bb.0:
1145+
; SDAG-GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1146+
; SDAG-GFX12-NEXT: s_wait_expcnt 0x0
1147+
; SDAG-GFX12-NEXT: s_wait_samplecnt 0x0
1148+
; SDAG-GFX12-NEXT: s_wait_bvhcnt 0x0
1149+
; SDAG-GFX12-NEXT: s_wait_kmcnt 0x0
1150+
; SDAG-GFX12-NEXT: v_and_b32_e32 v0, 0xffff, v0
1151+
; SDAG-GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1152+
; SDAG-GFX12-NEXT: v_lshl_or_b32 v0, v1, 16, v0
1153+
; SDAG-GFX12-NEXT: v_sat_pk_u8_i16_e32 v0, v0
1154+
; SDAG-GFX12-NEXT: s_setpc_b64 s[30:31]
1155+
;
11201156
; GISEL-VI-LABEL: basic_smax_smin_bit_shl:
11211157
; GISEL-VI: ; %bb.0:
11221158
; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1185,13 +1221,7 @@ define i16 @basic_smax_smin_vec_input(<2 x i16> %src) {
11851221
; SDAG-GFX11-LABEL: basic_smax_smin_vec_input:
11861222
; SDAG-GFX11: ; %bb.0:
11871223
; SDAG-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1188-
; SDAG-GFX11-NEXT: v_pk_min_i16 v0, 0xff, v0 op_sel_hi:[0,1]
1189-
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1190-
; SDAG-GFX11-NEXT: v_pk_max_i16 v0, v0, 0
1191-
; SDAG-GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0
1192-
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1193-
; SDAG-GFX11-NEXT: v_lshlrev_b16 v1, 8, v1
1194-
; SDAG-GFX11-NEXT: v_or_b32_e32 v0, v0, v1
1224+
; SDAG-GFX11-NEXT: v_sat_pk_u8_i16_e32 v0, v0
11951225
; SDAG-GFX11-NEXT: s_setpc_b64 s[30:31]
11961226
;
11971227
; SDAG-GFX12-LABEL: basic_smax_smin_vec_input:
@@ -1232,16 +1262,7 @@ define i16 @basic_smax_smin_vec_input(<2 x i16> %src) {
12321262
; GISEL-GFX11-LABEL: basic_smax_smin_vec_input:
12331263
; GISEL-GFX11: ; %bb.0:
12341264
; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1235-
; GISEL-GFX11-NEXT: v_pk_min_i16 v0, 0xff00ff, v0
1236-
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1237-
; GISEL-GFX11-NEXT: v_pk_max_i16 v0, 0, v0
1238-
; GISEL-GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0
1239-
; GISEL-GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0
1240-
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
1241-
; GISEL-GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1
1242-
; GISEL-GFX11-NEXT: v_lshlrev_b16 v1, 8, v1
1243-
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
1244-
; GISEL-GFX11-NEXT: v_or_b32_e32 v0, v0, v1
1265+
; GISEL-GFX11-NEXT: v_sat_pk_u8_i16_e32 v0, v0
12451266
; GISEL-GFX11-NEXT: s_setpc_b64 s[30:31]
12461267
;
12471268
; GISEL-GFX12-LABEL: basic_smax_smin_vec_input:
@@ -1296,13 +1317,7 @@ define i16 @basic_smax_smin_vec_input_rev(<2 x i16> %src) {
12961317
; SDAG-GFX11-LABEL: basic_smax_smin_vec_input_rev:
12971318
; SDAG-GFX11: ; %bb.0:
12981319
; SDAG-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1299-
; SDAG-GFX11-NEXT: v_pk_max_i16 v0, v0, 0
1300-
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1301-
; SDAG-GFX11-NEXT: v_pk_min_i16 v0, 0xff, v0 op_sel_hi:[0,1]
1302-
; SDAG-GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0
1303-
; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1304-
; SDAG-GFX11-NEXT: v_lshlrev_b16 v1, 8, v1
1305-
; SDAG-GFX11-NEXT: v_or_b32_e32 v0, v0, v1
1320+
; SDAG-GFX11-NEXT: v_sat_pk_u8_i16_e32 v0, v0
13061321
; SDAG-GFX11-NEXT: s_setpc_b64 s[30:31]
13071322
;
13081323
; SDAG-GFX12-LABEL: basic_smax_smin_vec_input_rev:
@@ -1342,16 +1357,7 @@ define i16 @basic_smax_smin_vec_input_rev(<2 x i16> %src) {
13421357
; GISEL-GFX11-LABEL: basic_smax_smin_vec_input_rev:
13431358
; GISEL-GFX11: ; %bb.0:
13441359
; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1345-
; GISEL-GFX11-NEXT: v_pk_max_i16 v0, 0, v0
1346-
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1347-
; GISEL-GFX11-NEXT: v_pk_min_i16 v0, 0xff00ff, v0
1348-
; GISEL-GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0
1349-
; GISEL-GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0
1350-
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
1351-
; GISEL-GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1
1352-
; GISEL-GFX11-NEXT: v_lshlrev_b16 v1, 8, v1
1353-
; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
1354-
; GISEL-GFX11-NEXT: v_or_b32_e32 v0, v0, v1
1360+
; GISEL-GFX11-NEXT: v_sat_pk_u8_i16_e32 v0, v0
13551361
; GISEL-GFX11-NEXT: s_setpc_b64 s[30:31]
13561362
;
13571363
; GISEL-GFX12-LABEL: basic_smax_smin_vec_input_rev:

0 commit comments

Comments
 (0)