@@ -839,6 +839,19 @@ define i16 @basic_smax_smin_bit_or(i16 %src0, i16 %src1) {
839
839
; SDAG-GFX12-NEXT: v_or_b32_e32 v0, v0, v1
840
840
; SDAG-GFX12-NEXT: s_setpc_b64 s[30:31]
841
841
;
842
+ ; SDAG-GFX12-LABEL: basic_smax_smin_bit_or:
843
+ ; SDAG-GFX12: ; %bb.0:
844
+ ; SDAG-GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
845
+ ; SDAG-GFX12-NEXT: s_wait_expcnt 0x0
846
+ ; SDAG-GFX12-NEXT: s_wait_samplecnt 0x0
847
+ ; SDAG-GFX12-NEXT: s_wait_bvhcnt 0x0
848
+ ; SDAG-GFX12-NEXT: s_wait_kmcnt 0x0
849
+ ; SDAG-GFX12-NEXT: v_and_b32_e32 v0, 0xffff, v0
850
+ ; SDAG-GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
851
+ ; SDAG-GFX12-NEXT: v_lshl_or_b32 v0, v1, 16, v0
852
+ ; SDAG-GFX12-NEXT: v_sat_pk_u8_i16_e32 v0, v0
853
+ ; SDAG-GFX12-NEXT: s_setpc_b64 s[30:31]
854
+ ;
842
855
; GISEL-VI-LABEL: basic_smax_smin_bit_or:
843
856
; GISEL-VI: ; %bb.0:
844
857
; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -926,6 +939,20 @@ define i16 @basic_umax_umin_bit_or(i16 %src0, i16 %src1) {
926
939
; SDAG-GFX12-NEXT: v_or_b32_e32 v0, v0, v1
927
940
; SDAG-GFX12-NEXT: s_setpc_b64 s[30:31]
928
941
;
942
+ ; SDAG-GFX12-LABEL: basic_umax_umin_bit_or:
943
+ ; SDAG-GFX12: ; %bb.0:
944
+ ; SDAG-GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
945
+ ; SDAG-GFX12-NEXT: s_wait_expcnt 0x0
946
+ ; SDAG-GFX12-NEXT: s_wait_samplecnt 0x0
947
+ ; SDAG-GFX12-NEXT: s_wait_bvhcnt 0x0
948
+ ; SDAG-GFX12-NEXT: s_wait_kmcnt 0x0
949
+ ; SDAG-GFX12-NEXT: v_min_u16 v1, 0xff, v1
950
+ ; SDAG-GFX12-NEXT: v_min_u16 v0, 0xff, v0
951
+ ; SDAG-GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
952
+ ; SDAG-GFX12-NEXT: v_lshlrev_b16 v1, 8, v1
953
+ ; SDAG-GFX12-NEXT: v_or_b32_e32 v0, v0, v1
954
+ ; SDAG-GFX12-NEXT: s_setpc_b64 s[30:31]
955
+ ;
929
956
; GISEL-VI-LABEL: basic_umax_umin_bit_or:
930
957
; GISEL-VI: ; %bb.0:
931
958
; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -992,13 +1019,10 @@ define i16 @basic_smax_smin_vec_cast(i16 %src0, i16 %src1) {
992
1019
; SDAG-GFX11-LABEL: basic_smax_smin_vec_cast:
993
1020
; SDAG-GFX11: ; %bb.0:
994
1021
; SDAG-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
995
- ; SDAG-GFX11-NEXT: v_med3_i16 v1, v1, 0, 0xff
996
- ; SDAG-GFX11-NEXT: v_med3_i16 v0, v0, 0, 0xff
997
- ; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
998
- ; SDAG-GFX11-NEXT: v_lshlrev_b16 v1, 8, v1
999
- ; SDAG-GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0
1000
- ; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
1001
- ; SDAG-GFX11-NEXT: v_or_b32_e32 v0, v0, v1
1022
+ ; SDAG-GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
1023
+ ; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1024
+ ; SDAG-GFX11-NEXT: v_lshl_or_b32 v0, v1, 16, v0
1025
+ ; SDAG-GFX11-NEXT: v_sat_pk_u8_i16_e32 v0, v0
1002
1026
; SDAG-GFX11-NEXT: s_setpc_b64 s[30:31]
1003
1027
;
1004
1028
; SDAG-GFX12-LABEL: basic_smax_smin_vec_cast:
@@ -1041,11 +1065,10 @@ define i16 @basic_smax_smin_vec_cast(i16 %src0, i16 %src1) {
1041
1065
; GISEL-GFX11-LABEL: basic_smax_smin_vec_cast:
1042
1066
; GISEL-GFX11: ; %bb.0:
1043
1067
; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1044
- ; GISEL-GFX11-NEXT: v_med3_i16 v1, v1, 0, 0xff
1045
- ; GISEL-GFX11-NEXT: v_med3_i16 v0, v0, 0, 0xff
1046
- ; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
1047
- ; GISEL-GFX11-NEXT: v_lshlrev_b16 v1, 8, v1
1048
- ; GISEL-GFX11-NEXT: v_or_b32_e32 v0, v0, v1
1068
+ ; GISEL-GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0
1069
+ ; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1070
+ ; GISEL-GFX11-NEXT: v_lshl_or_b32 v0, v1, 16, v0
1071
+ ; GISEL-GFX11-NEXT: v_sat_pk_u8_i16_e32 v0, v0
1049
1072
; GISEL-GFX11-NEXT: s_setpc_b64 s[30:31]
1050
1073
;
1051
1074
; GISEL-GFX12-LABEL: basic_smax_smin_vec_cast:
@@ -1117,6 +1140,19 @@ define i16 @basic_smax_smin_bit_shl(i16 %src0, i16 %src1) {
1117
1140
; SDAG-GFX12-NEXT: v_or_b32_e32 v0, v0, v1
1118
1141
; SDAG-GFX12-NEXT: s_setpc_b64 s[30:31]
1119
1142
;
1143
+ ; SDAG-GFX12-LABEL: basic_smax_smin_bit_shl:
1144
+ ; SDAG-GFX12: ; %bb.0:
1145
+ ; SDAG-GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1146
+ ; SDAG-GFX12-NEXT: s_wait_expcnt 0x0
1147
+ ; SDAG-GFX12-NEXT: s_wait_samplecnt 0x0
1148
+ ; SDAG-GFX12-NEXT: s_wait_bvhcnt 0x0
1149
+ ; SDAG-GFX12-NEXT: s_wait_kmcnt 0x0
1150
+ ; SDAG-GFX12-NEXT: v_and_b32_e32 v0, 0xffff, v0
1151
+ ; SDAG-GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1152
+ ; SDAG-GFX12-NEXT: v_lshl_or_b32 v0, v1, 16, v0
1153
+ ; SDAG-GFX12-NEXT: v_sat_pk_u8_i16_e32 v0, v0
1154
+ ; SDAG-GFX12-NEXT: s_setpc_b64 s[30:31]
1155
+ ;
1120
1156
; GISEL-VI-LABEL: basic_smax_smin_bit_shl:
1121
1157
; GISEL-VI: ; %bb.0:
1122
1158
; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1185,13 +1221,7 @@ define i16 @basic_smax_smin_vec_input(<2 x i16> %src) {
1185
1221
; SDAG-GFX11-LABEL: basic_smax_smin_vec_input:
1186
1222
; SDAG-GFX11: ; %bb.0:
1187
1223
; SDAG-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1188
- ; SDAG-GFX11-NEXT: v_pk_min_i16 v0, 0xff, v0 op_sel_hi:[0,1]
1189
- ; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1190
- ; SDAG-GFX11-NEXT: v_pk_max_i16 v0, v0, 0
1191
- ; SDAG-GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0
1192
- ; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1193
- ; SDAG-GFX11-NEXT: v_lshlrev_b16 v1, 8, v1
1194
- ; SDAG-GFX11-NEXT: v_or_b32_e32 v0, v0, v1
1224
+ ; SDAG-GFX11-NEXT: v_sat_pk_u8_i16_e32 v0, v0
1195
1225
; SDAG-GFX11-NEXT: s_setpc_b64 s[30:31]
1196
1226
;
1197
1227
; SDAG-GFX12-LABEL: basic_smax_smin_vec_input:
@@ -1232,16 +1262,7 @@ define i16 @basic_smax_smin_vec_input(<2 x i16> %src) {
1232
1262
; GISEL-GFX11-LABEL: basic_smax_smin_vec_input:
1233
1263
; GISEL-GFX11: ; %bb.0:
1234
1264
; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1235
- ; GISEL-GFX11-NEXT: v_pk_min_i16 v0, 0xff00ff, v0
1236
- ; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1237
- ; GISEL-GFX11-NEXT: v_pk_max_i16 v0, 0, v0
1238
- ; GISEL-GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0
1239
- ; GISEL-GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0
1240
- ; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
1241
- ; GISEL-GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1
1242
- ; GISEL-GFX11-NEXT: v_lshlrev_b16 v1, 8, v1
1243
- ; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
1244
- ; GISEL-GFX11-NEXT: v_or_b32_e32 v0, v0, v1
1265
+ ; GISEL-GFX11-NEXT: v_sat_pk_u8_i16_e32 v0, v0
1245
1266
; GISEL-GFX11-NEXT: s_setpc_b64 s[30:31]
1246
1267
;
1247
1268
; GISEL-GFX12-LABEL: basic_smax_smin_vec_input:
@@ -1296,13 +1317,7 @@ define i16 @basic_smax_smin_vec_input_rev(<2 x i16> %src) {
1296
1317
; SDAG-GFX11-LABEL: basic_smax_smin_vec_input_rev:
1297
1318
; SDAG-GFX11: ; %bb.0:
1298
1319
; SDAG-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1299
- ; SDAG-GFX11-NEXT: v_pk_max_i16 v0, v0, 0
1300
- ; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1301
- ; SDAG-GFX11-NEXT: v_pk_min_i16 v0, 0xff, v0 op_sel_hi:[0,1]
1302
- ; SDAG-GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0
1303
- ; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1304
- ; SDAG-GFX11-NEXT: v_lshlrev_b16 v1, 8, v1
1305
- ; SDAG-GFX11-NEXT: v_or_b32_e32 v0, v0, v1
1320
+ ; SDAG-GFX11-NEXT: v_sat_pk_u8_i16_e32 v0, v0
1306
1321
; SDAG-GFX11-NEXT: s_setpc_b64 s[30:31]
1307
1322
;
1308
1323
; SDAG-GFX12-LABEL: basic_smax_smin_vec_input_rev:
@@ -1342,16 +1357,7 @@ define i16 @basic_smax_smin_vec_input_rev(<2 x i16> %src) {
1342
1357
; GISEL-GFX11-LABEL: basic_smax_smin_vec_input_rev:
1343
1358
; GISEL-GFX11: ; %bb.0:
1344
1359
; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1345
- ; GISEL-GFX11-NEXT: v_pk_max_i16 v0, 0, v0
1346
- ; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1347
- ; GISEL-GFX11-NEXT: v_pk_min_i16 v0, 0xff00ff, v0
1348
- ; GISEL-GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0
1349
- ; GISEL-GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0
1350
- ; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
1351
- ; GISEL-GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1
1352
- ; GISEL-GFX11-NEXT: v_lshlrev_b16 v1, 8, v1
1353
- ; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
1354
- ; GISEL-GFX11-NEXT: v_or_b32_e32 v0, v0, v1
1360
+ ; GISEL-GFX11-NEXT: v_sat_pk_u8_i16_e32 v0, v0
1355
1361
; GISEL-GFX11-NEXT: s_setpc_b64 s[30:31]
1356
1362
;
1357
1363
; GISEL-GFX12-LABEL: basic_smax_smin_vec_input_rev:
0 commit comments