From 11170008c586bbaabf2572f5540aec48853b4631 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= Date: Tue, 9 Jan 2024 21:46:23 +0100 Subject: [PATCH] [GlobalIsel] Combine select to integer minmax (second attemnt). Instcombine canonicalizes selects to floating point and integer minmax. This and the dag combiner canonicalize to floating point minmax. None of them canonicalizes to integer minmax. On Neoverse V2 basic integer arithmetic and integer minmax have the same costs. --- .../llvm/CodeGen/GlobalISel/CombinerHelper.h | 3 + .../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 84 ++++++ .../AArch64/GlobalISel/arm64-atomic.ll | 32 +- .../AArch64/GlobalISel/arm64-pcsections.ll | 8 +- .../AArch64/GlobalISel/combine-select.mir | 281 ++++++++++++++++++ 5 files changed, 388 insertions(+), 20 deletions(-) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h index dcc1a4580b14a..a6e9406bed06a 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -910,6 +910,9 @@ class CombinerHelper { bool tryFoldSelectOfConstants(GSelect *Select, BuildFnTy &MatchInfo); + /// Try to fold (icmp X, Y) ? X : Y -> integer minmax. + bool tryFoldSelectToIntMinMax(GSelect *Select, BuildFnTy &MatchInfo); + bool isOneOrOneSplat(Register Src, bool AllowUndefs); bool isZeroOrZeroSplat(Register Src, bool AllowUndefs); bool isConstantSplatVector(Register Src, int64_t SplatValue, diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 8b15bdb0aca30..fc2793bd7a133 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -6548,6 +6548,87 @@ bool CombinerHelper::tryFoldBoolSelectToLogic(GSelect *Select, return false; } +bool CombinerHelper::tryFoldSelectToIntMinMax(GSelect *Select, + BuildFnTy &MatchInfo) { + Register DstReg = Select->getReg(0); + Register Cond = Select->getCondReg(); + Register True = Select->getTrueReg(); + Register False = Select->getFalseReg(); + LLT DstTy = MRI.getType(DstReg); + + // We need an G_ICMP on the condition register. + GICmp *Cmp = getOpcodeDef(Cond, MRI); + if (!Cmp) + return false; + + // We want to fold the icmp and replace the select. + if (!MRI.hasOneNonDBGUse(Cmp->getReg(0))) + return false; + + CmpInst::Predicate Pred = Cmp->getCond(); + // We need a larger or smaller predicate for + // canonicalization. + if (CmpInst::isEquality(Pred)) + return false; + + Register CmpLHS = Cmp->getLHSReg(); + Register CmpRHS = Cmp->getRHSReg(); + + // We can swap CmpLHS and CmpRHS for higher hitrate. + if (True == CmpRHS && False == CmpLHS) { + std::swap(CmpLHS, CmpRHS); + Pred = CmpInst::getSwappedPredicate(Pred); + } + + // (icmp X, Y) ? X : Y -> integer minmax. + // see matchSelectPattern in ValueTracking. + // Legality between G_SELECT and integer minmax can differ. + if (True == CmpLHS && False == CmpRHS) { + switch (Pred) { + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_UGE: { + if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMAX, DstTy})) + return false; + MatchInfo = [=](MachineIRBuilder &B) { + B.buildUMax(DstReg, True, False); + }; + return true; + } + case ICmpInst::ICMP_SGT: + case ICmpInst::ICMP_SGE: { + if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMAX, DstTy})) + return false; + MatchInfo = [=](MachineIRBuilder &B) { + B.buildSMax(DstReg, True, False); + }; + return true; + } + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_ULE: { + if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMIN, DstTy})) + return false; + MatchInfo = [=](MachineIRBuilder &B) { + B.buildUMin(DstReg, True, False); + }; + return true; + } + case ICmpInst::ICMP_SLT: + case ICmpInst::ICMP_SLE: { + if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMIN, DstTy})) + return false; + MatchInfo = [=](MachineIRBuilder &B) { + B.buildSMin(DstReg, True, False); + }; + return true; + } + default: + return false; + } + } + + return false; +} + bool CombinerHelper::matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo) { GSelect *Select = cast(&MI); @@ -6557,5 +6638,8 @@ bool CombinerHelper::matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo) { if (tryFoldBoolSelectToLogic(Select, MatchInfo)) return true; + if (tryFoldSelectToIntMinMax(Select, MatchInfo)) + return true; + return false; } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll index 739332414c198..0e9c126e97a3d 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll @@ -2421,7 +2421,7 @@ define i8 @atomicrmw_min_i8(ptr %ptr, i8 %rhs) { ; CHECK-NOLSE-O1-NEXT: ldaxrb w8, [x0] ; CHECK-NOLSE-O1-NEXT: sxtb w9, w8 ; CHECK-NOLSE-O1-NEXT: cmp w9, w1, sxtb -; CHECK-NOLSE-O1-NEXT: csel w9, w8, w1, le +; CHECK-NOLSE-O1-NEXT: csel w9, w8, w1, lt ; CHECK-NOLSE-O1-NEXT: stxrb w10, w9, [x0] ; CHECK-NOLSE-O1-NEXT: cbnz w10, LBB33_1 ; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %atomicrmw.end @@ -2435,7 +2435,7 @@ define i8 @atomicrmw_min_i8(ptr %ptr, i8 %rhs) { ; CHECK-OUTLINE-O1-NEXT: ldaxrb w8, [x0] ; CHECK-OUTLINE-O1-NEXT: sxtb w9, w8 ; CHECK-OUTLINE-O1-NEXT: cmp w9, w1, sxtb -; CHECK-OUTLINE-O1-NEXT: csel w9, w8, w1, le +; CHECK-OUTLINE-O1-NEXT: csel w9, w8, w1, lt ; CHECK-OUTLINE-O1-NEXT: stxrb w10, w9, [x0] ; CHECK-OUTLINE-O1-NEXT: cbnz w10, LBB33_1 ; CHECK-OUTLINE-O1-NEXT: ; %bb.2: ; %atomicrmw.end @@ -2662,7 +2662,7 @@ define i8 @atomicrmw_umin_i8(ptr %ptr, i8 %rhs) { ; CHECK-NOLSE-O1-NEXT: ldaxrb w8, [x0] ; CHECK-NOLSE-O1-NEXT: and w10, w8, #0xff ; CHECK-NOLSE-O1-NEXT: cmp w10, w9 -; CHECK-NOLSE-O1-NEXT: csel w10, w10, w9, ls +; CHECK-NOLSE-O1-NEXT: csel w10, w10, w9, lo ; CHECK-NOLSE-O1-NEXT: stlxrb w11, w10, [x0] ; CHECK-NOLSE-O1-NEXT: cbnz w11, LBB35_1 ; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %atomicrmw.end @@ -2677,7 +2677,7 @@ define i8 @atomicrmw_umin_i8(ptr %ptr, i8 %rhs) { ; CHECK-OUTLINE-O1-NEXT: ldaxrb w8, [x0] ; CHECK-OUTLINE-O1-NEXT: and w10, w8, #0xff ; CHECK-OUTLINE-O1-NEXT: cmp w10, w9 -; CHECK-OUTLINE-O1-NEXT: csel w10, w10, w9, ls +; CHECK-OUTLINE-O1-NEXT: csel w10, w10, w9, lo ; CHECK-OUTLINE-O1-NEXT: stlxrb w11, w10, [x0] ; CHECK-OUTLINE-O1-NEXT: cbnz w11, LBB35_1 ; CHECK-OUTLINE-O1-NEXT: ; %bb.2: ; %atomicrmw.end @@ -3477,7 +3477,7 @@ define i16 @atomicrmw_min_i16(ptr %ptr, i16 %rhs) { ; CHECK-NOLSE-O1-NEXT: ldaxrh w8, [x0] ; CHECK-NOLSE-O1-NEXT: sxth w9, w8 ; CHECK-NOLSE-O1-NEXT: cmp w9, w1, sxth -; CHECK-NOLSE-O1-NEXT: csel w9, w8, w1, le +; CHECK-NOLSE-O1-NEXT: csel w9, w8, w1, lt ; CHECK-NOLSE-O1-NEXT: stxrh w10, w9, [x0] ; CHECK-NOLSE-O1-NEXT: cbnz w10, LBB43_1 ; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %atomicrmw.end @@ -3491,7 +3491,7 @@ define i16 @atomicrmw_min_i16(ptr %ptr, i16 %rhs) { ; CHECK-OUTLINE-O1-NEXT: ldaxrh w8, [x0] ; CHECK-OUTLINE-O1-NEXT: sxth w9, w8 ; CHECK-OUTLINE-O1-NEXT: cmp w9, w1, sxth -; CHECK-OUTLINE-O1-NEXT: csel w9, w8, w1, le +; CHECK-OUTLINE-O1-NEXT: csel w9, w8, w1, lt ; CHECK-OUTLINE-O1-NEXT: stxrh w10, w9, [x0] ; CHECK-OUTLINE-O1-NEXT: cbnz w10, LBB43_1 ; CHECK-OUTLINE-O1-NEXT: ; %bb.2: ; %atomicrmw.end @@ -3718,7 +3718,7 @@ define i16 @atomicrmw_umin_i16(ptr %ptr, i16 %rhs) { ; CHECK-NOLSE-O1-NEXT: ldaxrh w8, [x0] ; CHECK-NOLSE-O1-NEXT: and w10, w8, #0xffff ; CHECK-NOLSE-O1-NEXT: cmp w10, w9 -; CHECK-NOLSE-O1-NEXT: csel w10, w10, w9, ls +; CHECK-NOLSE-O1-NEXT: csel w10, w10, w9, lo ; CHECK-NOLSE-O1-NEXT: stlxrh w11, w10, [x0] ; CHECK-NOLSE-O1-NEXT: cbnz w11, LBB45_1 ; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %atomicrmw.end @@ -3733,7 +3733,7 @@ define i16 @atomicrmw_umin_i16(ptr %ptr, i16 %rhs) { ; CHECK-OUTLINE-O1-NEXT: ldaxrh w8, [x0] ; CHECK-OUTLINE-O1-NEXT: and w10, w8, #0xffff ; CHECK-OUTLINE-O1-NEXT: cmp w10, w9 -; CHECK-OUTLINE-O1-NEXT: csel w10, w10, w9, ls +; CHECK-OUTLINE-O1-NEXT: csel w10, w10, w9, lo ; CHECK-OUTLINE-O1-NEXT: stlxrh w11, w10, [x0] ; CHECK-OUTLINE-O1-NEXT: cbnz w11, LBB45_1 ; CHECK-OUTLINE-O1-NEXT: ; %bb.2: ; %atomicrmw.end @@ -4526,7 +4526,7 @@ define i32 @atomicrmw_min_i32(ptr %ptr, i32 %rhs) { ; CHECK-NOLSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NOLSE-O1-NEXT: ldaxr w8, [x0] ; CHECK-NOLSE-O1-NEXT: cmp w8, w1 -; CHECK-NOLSE-O1-NEXT: csel w9, w8, w1, le +; CHECK-NOLSE-O1-NEXT: csel w9, w8, w1, lt ; CHECK-NOLSE-O1-NEXT: stxr w10, w9, [x0] ; CHECK-NOLSE-O1-NEXT: cbnz w10, LBB53_1 ; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %atomicrmw.end @@ -4539,7 +4539,7 @@ define i32 @atomicrmw_min_i32(ptr %ptr, i32 %rhs) { ; CHECK-OUTLINE-O1-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-OUTLINE-O1-NEXT: ldaxr w8, [x0] ; CHECK-OUTLINE-O1-NEXT: cmp w8, w1 -; CHECK-OUTLINE-O1-NEXT: csel w9, w8, w1, le +; CHECK-OUTLINE-O1-NEXT: csel w9, w8, w1, lt ; CHECK-OUTLINE-O1-NEXT: stxr w10, w9, [x0] ; CHECK-OUTLINE-O1-NEXT: cbnz w10, LBB53_1 ; CHECK-OUTLINE-O1-NEXT: ; %bb.2: ; %atomicrmw.end @@ -4754,7 +4754,7 @@ define i32 @atomicrmw_umin_i32(ptr %ptr, i32 %rhs) { ; CHECK-NOLSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NOLSE-O1-NEXT: ldaxr w8, [x0] ; CHECK-NOLSE-O1-NEXT: cmp w8, w1 -; CHECK-NOLSE-O1-NEXT: csel w9, w8, w1, ls +; CHECK-NOLSE-O1-NEXT: csel w9, w8, w1, lo ; CHECK-NOLSE-O1-NEXT: stlxr w10, w9, [x0] ; CHECK-NOLSE-O1-NEXT: cbnz w10, LBB55_1 ; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %atomicrmw.end @@ -4767,7 +4767,7 @@ define i32 @atomicrmw_umin_i32(ptr %ptr, i32 %rhs) { ; CHECK-OUTLINE-O1-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-OUTLINE-O1-NEXT: ldaxr w8, [x0] ; CHECK-OUTLINE-O1-NEXT: cmp w8, w1 -; CHECK-OUTLINE-O1-NEXT: csel w9, w8, w1, ls +; CHECK-OUTLINE-O1-NEXT: csel w9, w8, w1, lo ; CHECK-OUTLINE-O1-NEXT: stlxr w10, w9, [x0] ; CHECK-OUTLINE-O1-NEXT: cbnz w10, LBB55_1 ; CHECK-OUTLINE-O1-NEXT: ; %bb.2: ; %atomicrmw.end @@ -5547,7 +5547,7 @@ define i64 @atomicrmw_min_i64(ptr %ptr, i64 %rhs) { ; CHECK-NOLSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NOLSE-O1-NEXT: ldaxr x8, [x0] ; CHECK-NOLSE-O1-NEXT: cmp x8, x1 -; CHECK-NOLSE-O1-NEXT: csel x9, x8, x1, le +; CHECK-NOLSE-O1-NEXT: csel x9, x8, x1, lt ; CHECK-NOLSE-O1-NEXT: stxr w10, x9, [x0] ; CHECK-NOLSE-O1-NEXT: cbnz w10, LBB63_1 ; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %atomicrmw.end @@ -5560,7 +5560,7 @@ define i64 @atomicrmw_min_i64(ptr %ptr, i64 %rhs) { ; CHECK-OUTLINE-O1-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-OUTLINE-O1-NEXT: ldaxr x8, [x0] ; CHECK-OUTLINE-O1-NEXT: cmp x8, x1 -; CHECK-OUTLINE-O1-NEXT: csel x9, x8, x1, le +; CHECK-OUTLINE-O1-NEXT: csel x9, x8, x1, lt ; CHECK-OUTLINE-O1-NEXT: stxr w10, x9, [x0] ; CHECK-OUTLINE-O1-NEXT: cbnz w10, LBB63_1 ; CHECK-OUTLINE-O1-NEXT: ; %bb.2: ; %atomicrmw.end @@ -5775,7 +5775,7 @@ define i64 @atomicrmw_umin_i64(ptr %ptr, i64 %rhs) { ; CHECK-NOLSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NOLSE-O1-NEXT: ldaxr x8, [x0] ; CHECK-NOLSE-O1-NEXT: cmp x8, x1 -; CHECK-NOLSE-O1-NEXT: csel x9, x8, x1, ls +; CHECK-NOLSE-O1-NEXT: csel x9, x8, x1, lo ; CHECK-NOLSE-O1-NEXT: stlxr w10, x9, [x0] ; CHECK-NOLSE-O1-NEXT: cbnz w10, LBB65_1 ; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %atomicrmw.end @@ -5788,7 +5788,7 @@ define i64 @atomicrmw_umin_i64(ptr %ptr, i64 %rhs) { ; CHECK-OUTLINE-O1-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-OUTLINE-O1-NEXT: ldaxr x8, [x0] ; CHECK-OUTLINE-O1-NEXT: cmp x8, x1 -; CHECK-OUTLINE-O1-NEXT: csel x9, x8, x1, ls +; CHECK-OUTLINE-O1-NEXT: csel x9, x8, x1, lo ; CHECK-OUTLINE-O1-NEXT: stlxr w10, x9, [x0] ; CHECK-OUTLINE-O1-NEXT: cbnz w10, LBB65_1 ; CHECK-OUTLINE-O1-NEXT: ; %bb.2: ; %atomicrmw.end diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll index 4c07081404c88..5a7bd6ee20f9b 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll @@ -888,7 +888,7 @@ define i8 @atomicrmw_min_i8(ptr %ptr, i8 %rhs) { ; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr) ; CHECK-NEXT: renamable $w9 = SBFMWri renamable $w8, 0, 7, pcsections !0 ; CHECK-NEXT: dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 32, implicit-def $nzcv, pcsections !0 - ; CHECK-NEXT: renamable $w9 = CSELWr renamable $w8, renamable $w1, 13, implicit killed $nzcv, implicit-def $x9, pcsections !0 + ; CHECK-NEXT: renamable $w9 = CSELWr renamable $w8, renamable $w1, 11, implicit killed $nzcv, implicit-def $x9, pcsections !0 ; CHECK-NEXT: early-clobber renamable $w10 = STXRB renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s8) into %ir.ptr) ; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0 ; CHECK-NEXT: {{ $}} @@ -943,7 +943,7 @@ define i8 @atomicrmw_umin_i8(ptr %ptr, i8 %rhs) { ; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr) ; CHECK-NEXT: renamable $w10 = ANDWri renamable $w8, 7 ; CHECK-NEXT: $wzr = SUBSWrs renamable $w10, renamable $w9, 0, implicit-def $nzcv, pcsections !0 - ; CHECK-NEXT: renamable $w10 = CSELWr killed renamable $w10, renamable $w9, 9, implicit killed $nzcv, implicit-def $x10, pcsections !0 + ; CHECK-NEXT: renamable $w10 = CSELWr killed renamable $w10, renamable $w9, 3, implicit killed $nzcv, implicit-def $x10, pcsections !0 ; CHECK-NEXT: early-clobber renamable $w11 = STLXRB renamable $w10, renamable $x0, implicit killed $x10, pcsections !0 :: (volatile store (s8) into %ir.ptr) ; CHECK-NEXT: CBNZW killed renamable $w11, %bb.1, pcsections !0 ; CHECK-NEXT: {{ $}} @@ -1148,7 +1148,7 @@ define i16 @atomicrmw_min_i16(ptr %ptr, i16 %rhs) { ; CHECK-NEXT: renamable $w8 = LDAXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr) ; CHECK-NEXT: renamable $w9 = SBFMWri renamable $w8, 0, 15, pcsections !0 ; CHECK-NEXT: dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 40, implicit-def $nzcv, pcsections !0 - ; CHECK-NEXT: renamable $w9 = CSELWr renamable $w8, renamable $w1, 13, implicit killed $nzcv, implicit-def $x9, pcsections !0 + ; CHECK-NEXT: renamable $w9 = CSELWr renamable $w8, renamable $w1, 11, implicit killed $nzcv, implicit-def $x9, pcsections !0 ; CHECK-NEXT: early-clobber renamable $w10 = STXRH renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s16) into %ir.ptr) ; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0 ; CHECK-NEXT: {{ $}} @@ -1203,7 +1203,7 @@ define i16 @atomicrmw_umin_i16(ptr %ptr, i16 %rhs) { ; CHECK-NEXT: renamable $w8 = LDAXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr) ; CHECK-NEXT: renamable $w10 = ANDWri renamable $w8, 15 ; CHECK-NEXT: $wzr = SUBSWrs renamable $w10, renamable $w9, 0, implicit-def $nzcv, pcsections !0 - ; CHECK-NEXT: renamable $w10 = CSELWr killed renamable $w10, renamable $w9, 9, implicit killed $nzcv, implicit-def $x10, pcsections !0 + ; CHECK-NEXT: renamable $w10 = CSELWr killed renamable $w10, renamable $w9, 3, implicit killed $nzcv, implicit-def $x10, pcsections !0 ; CHECK-NEXT: early-clobber renamable $w11 = STLXRH renamable $w10, renamable $x0, implicit killed $x10, pcsections !0 :: (volatile store (s16) into %ir.ptr) ; CHECK-NEXT: CBNZW killed renamable $w11, %bb.1, pcsections !0 ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir index be2de620fa456..260cb72b0426a 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir @@ -544,3 +544,284 @@ body: | %ext:_(s32) = G_ANYEXT %sel $w0 = COPY %ext(s32) ... +--- +# select test(failed,registers) select icmp_ugt t,f_t_f --> umax(t,f) +name: select_failed_icmp_ugt_t_f_t_f_umax_t_f +body: | + bb.1: + liveins: $x0, $x1, $x2 + ; CHECK-LABEL: name: select_failed_icmp_ugt_t_f_t_f_umax_t_f + ; CHECK: liveins: $x0, $x1, $x2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 + ; CHECK-NEXT: %t:_(s8) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: %f:_(s8) = G_TRUNC [[COPY1]](s64) + ; CHECK-NEXT: %y:_(s8) = G_TRUNC [[COPY2]](s64) + ; CHECK-NEXT: %z:_(s8) = G_TRUNC [[COPY3]](s64) + ; CHECK-NEXT: %c:_(s8) = G_ICMP intpred(ugt), %t(s8), %y + ; CHECK-NEXT: %sel:_(s8) = exact G_SELECT %c(s8), %f, %z + ; CHECK-NEXT: %ext:_(s32) = G_ANYEXT %sel(s8) + ; CHECK-NEXT: $w0 = COPY %ext(s32) + %0:_(s64) = COPY $x0 + %1:_(s64) = COPY $x1 + %2:_(s64) = COPY $x2 + %3:_(s64) = COPY $x3 + %4:_(s64) = COPY $x4 + %t:_(s8) = G_TRUNC %0 + %f:_(s8) = G_TRUNC %1 + %y:_(s8) = G_TRUNC %2 + %z:_(s8) = G_TRUNC %3 + %c:_(s8) = G_ICMP intpred(ugt), %t(s8), %y(s8) + %sel:_(s8) = exact G_SELECT %c, %f, %z + %ext:_(s32) = G_ANYEXT %sel + $w0 = COPY %ext(s32) +... +--- +# test select icmp_ugt t,f_t_f --> umax(t,f) +name: select_icmp_ugt_t_f_t_f_umax_t_f +body: | + bb.1: + liveins: $x0, $x1, $x2 + ; CHECK-LABEL: name: select_icmp_ugt_t_f_t_f_umax_t_f + ; CHECK: liveins: $x0, $x1, $x2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 + ; CHECK-NEXT: %t1:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: %f1:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK-NEXT: %t:_(<4 x s32>) = G_BUILD_VECTOR %t1(s32), %t1(s32), %t1(s32), %t1(s32) + ; CHECK-NEXT: %f:_(<4 x s32>) = G_BUILD_VECTOR %f1(s32), %f1(s32), %f1(s32), %f1(s32) + ; CHECK-NEXT: %sel:_(<4 x s32>) = G_UMAX %t, %f + ; CHECK-NEXT: $q0 = COPY %sel(<4 x s32>) + %0:_(s64) = COPY $x0 + %1:_(s64) = COPY $x1 + %t1:_(s32) = G_TRUNC %0 + %f1:_(s32) = G_TRUNC %1 + %t:_(<4 x s32>) = G_BUILD_VECTOR %t1, %t1, %t1, %t1 + %f:_(<4 x s32>) = G_BUILD_VECTOR %f1, %f1, %f1, %f1 + %c:_(<4 x s32>) = G_ICMP intpred(ugt), %t(<4 x s32>), %f(<4 x s32>) + %sel:_(<4 x s32>) = exact G_SELECT %c, %t, %f + $q0 = COPY %sel(<4 x s32>) +... +--- +# test select icmp_uge t,f_t_f --> umax(t,f) +name: select_icmp_uge_t_f_t_f_umax_t_f +body: | + bb.1: + liveins: $x0, $x1, $x2 + ; CHECK-LABEL: name: select_icmp_uge_t_f_t_f_umax_t_f + ; CHECK: liveins: $x0, $x1, $x2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 + ; CHECK-NEXT: %t1:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: %f1:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK-NEXT: %t:_(<4 x s32>) = G_BUILD_VECTOR %t1(s32), %t1(s32), %t1(s32), %t1(s32) + ; CHECK-NEXT: %f:_(<4 x s32>) = G_BUILD_VECTOR %f1(s32), %f1(s32), %f1(s32), %f1(s32) + ; CHECK-NEXT: %sel:_(<4 x s32>) = G_UMAX %t, %f + ; CHECK-NEXT: $q0 = COPY %sel(<4 x s32>) + %0:_(s64) = COPY $x0 + %1:_(s64) = COPY $x1 + %t1:_(s32) = G_TRUNC %0 + %f1:_(s32) = G_TRUNC %1 + %t:_(<4 x s32>) = G_BUILD_VECTOR %t1, %t1, %t1, %t1 + %f:_(<4 x s32>) = G_BUILD_VECTOR %f1, %f1, %f1, %f1 + %c:_(<4 x s32>) = G_ICMP intpred(uge), %t(<4 x s32>), %f(<4 x s32>) + %sel:_(<4 x s32>) = exact G_SELECT %c, %t, %f + $q0 = COPY %sel(<4 x s32>) +... +--- +# test select icmp_sgt t,f_t_f --> smax(t,f) +name: select_icmp_sgt_t_f_t_f_smax_t_f +body: | + bb.1: + liveins: $x0, $x1, $x2 + ; CHECK-LABEL: name: select_icmp_sgt_t_f_t_f_smax_t_f + ; CHECK: liveins: $x0, $x1, $x2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 + ; CHECK-NEXT: %t1:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: %f1:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK-NEXT: %t:_(<4 x s32>) = G_BUILD_VECTOR %t1(s32), %t1(s32), %t1(s32), %t1(s32) + ; CHECK-NEXT: %f:_(<4 x s32>) = G_BUILD_VECTOR %f1(s32), %f1(s32), %f1(s32), %f1(s32) + ; CHECK-NEXT: %sel:_(<4 x s32>) = G_SMAX %t, %f + ; CHECK-NEXT: $q0 = COPY %sel(<4 x s32>) + %0:_(s64) = COPY $x0 + %1:_(s64) = COPY $x1 + %t1:_(s32) = G_TRUNC %0 + %f1:_(s32) = G_TRUNC %1 + %t:_(<4 x s32>) = G_BUILD_VECTOR %t1, %t1, %t1, %t1 + %f:_(<4 x s32>) = G_BUILD_VECTOR %f1, %f1, %f1, %f1 + %c:_(<4 x s32>) = G_ICMP intpred(sgt), %t(<4 x s32>), %f(<4 x s32>) + %sel:_(<4 x s32>) = exact G_SELECT %c, %t, %f + $q0 = COPY %sel(<4 x s32>) +... +--- +# test select icmp_sge t,f_t_f --> smax(t,f) +name: select_icmp_sge_t_f_t_f_smax_t_f +body: | + bb.1: + liveins: $x0, $x1, $x2 + ; CHECK-LABEL: name: select_icmp_sge_t_f_t_f_smax_t_f + ; CHECK: liveins: $x0, $x1, $x2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 + ; CHECK-NEXT: %t1:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: %f1:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK-NEXT: %t:_(<4 x s32>) = G_BUILD_VECTOR %t1(s32), %t1(s32), %t1(s32), %t1(s32) + ; CHECK-NEXT: %f:_(<4 x s32>) = G_BUILD_VECTOR %f1(s32), %f1(s32), %f1(s32), %f1(s32) + ; CHECK-NEXT: %sel:_(<4 x s32>) = G_SMAX %t, %f + ; CHECK-NEXT: $q0 = COPY %sel(<4 x s32>) + %0:_(s64) = COPY $x0 + %1:_(s64) = COPY $x1 + %t1:_(s32) = G_TRUNC %0 + %f1:_(s32) = G_TRUNC %1 + %t:_(<4 x s32>) = G_BUILD_VECTOR %t1, %t1, %t1, %t1 + %f:_(<4 x s32>) = G_BUILD_VECTOR %f1, %f1, %f1, %f1 + %c:_(<4 x s32>) = G_ICMP intpred(sge), %t(<4 x s32>), %f(<4 x s32>) + %sel:_(<4 x s32>) = exact G_SELECT %c, %t, %f + $q0 = COPY %sel(<4 x s32>) +... +--- +# test select icmp_ult t,f_t_f --> umin(t,f) +name: select_icmp_ult_t_f_t_f_umin_t_f +body: | + bb.1: + liveins: $x0, $x1, $x2 + ; CHECK-LABEL: name: select_icmp_ult_t_f_t_f_umin_t_f + ; CHECK: liveins: $x0, $x1, $x2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 + ; CHECK-NEXT: %t1:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: %f1:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK-NEXT: %t:_(<4 x s32>) = G_BUILD_VECTOR %t1(s32), %t1(s32), %t1(s32), %t1(s32) + ; CHECK-NEXT: %f:_(<4 x s32>) = G_BUILD_VECTOR %f1(s32), %f1(s32), %f1(s32), %f1(s32) + ; CHECK-NEXT: %sel:_(<4 x s32>) = G_UMIN %t, %f + ; CHECK-NEXT: $q0 = COPY %sel(<4 x s32>) + %0:_(s64) = COPY $x0 + %1:_(s64) = COPY $x1 + %t1:_(s32) = G_TRUNC %0 + %f1:_(s32) = G_TRUNC %1 + %t:_(<4 x s32>) = G_BUILD_VECTOR %t1, %t1, %t1, %t1 + %f:_(<4 x s32>) = G_BUILD_VECTOR %f1, %f1, %f1, %f1 + %c:_(<4 x s32>) = G_ICMP intpred(ult), %t(<4 x s32>), %f(<4 x s32>) + %sel:_(<4 x s32>) = exact G_SELECT %c, %t, %f + $q0 = COPY %sel(<4 x s32>) +... +--- +# test select icmp_ule t,f_t_f --> umin(t,f) +name: select_icmp_ule_t_f_t_f_umin_t_f +body: | + bb.1: + liveins: $x0, $x1, $x2 + ; CHECK-LABEL: name: select_icmp_ule_t_f_t_f_umin_t_f + ; CHECK: liveins: $x0, $x1, $x2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 + ; CHECK-NEXT: %t1:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: %f1:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK-NEXT: %t:_(<4 x s32>) = G_BUILD_VECTOR %t1(s32), %t1(s32), %t1(s32), %t1(s32) + ; CHECK-NEXT: %f:_(<4 x s32>) = G_BUILD_VECTOR %f1(s32), %f1(s32), %f1(s32), %f1(s32) + ; CHECK-NEXT: %sel:_(<4 x s32>) = G_UMIN %t, %f + ; CHECK-NEXT: $q0 = COPY %sel(<4 x s32>) + %0:_(s64) = COPY $x0 + %1:_(s64) = COPY $x1 + %t1:_(s32) = G_TRUNC %0 + %f1:_(s32) = G_TRUNC %1 + %t:_(<4 x s32>) = G_BUILD_VECTOR %t1, %t1, %t1, %t1 + %f:_(<4 x s32>) = G_BUILD_VECTOR %f1, %f1, %f1, %f1 + %c:_(<4 x s32>) = G_ICMP intpred(ule), %t(<4 x s32>), %f(<4 x s32>) + %sel:_(<4 x s32>) = exact G_SELECT %c, %t, %f + $q0 = COPY %sel(<4 x s32>) +... +--- +# test select icmp_slt t,f_t_f --> smin(t,f) +name: select_icmp_slt_t_f_t_f_smin_t_f +body: | + bb.1: + liveins: $x0, $x1, $x2 + ; CHECK-LABEL: name: select_icmp_slt_t_f_t_f_smin_t_f + ; CHECK: liveins: $x0, $x1, $x2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 + ; CHECK-NEXT: %t1:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: %f1:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK-NEXT: %t:_(<4 x s32>) = G_BUILD_VECTOR %t1(s32), %t1(s32), %t1(s32), %t1(s32) + ; CHECK-NEXT: %f:_(<4 x s32>) = G_BUILD_VECTOR %f1(s32), %f1(s32), %f1(s32), %f1(s32) + ; CHECK-NEXT: %sel:_(<4 x s32>) = G_SMIN %t, %f + ; CHECK-NEXT: $q0 = COPY %sel(<4 x s32>) + %0:_(s64) = COPY $x0 + %1:_(s64) = COPY $x1 + %t1:_(s32) = G_TRUNC %0 + %f1:_(s32) = G_TRUNC %1 + %t:_(<4 x s32>) = G_BUILD_VECTOR %t1, %t1, %t1, %t1 + %f:_(<4 x s32>) = G_BUILD_VECTOR %f1, %f1, %f1, %f1 + %c:_(<4 x s32>) = G_ICMP intpred(slt), %t(<4 x s32>), %f(<4 x s32>) + %sel:_(<4 x s32>) = exact G_SELECT %c, %t, %f + $q0 = COPY %sel(<4 x s32>) +... +--- +# test select icmp_sle t,f_t_f --> smin(t,f) +name: select_icmp_sle_t_f_t_f_smin_t_f +body: | + bb.1: + liveins: $x0, $x1, $x2 + ; CHECK-LABEL: name: select_icmp_sle_t_f_t_f_smin_t_f + ; CHECK: liveins: $x0, $x1, $x2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 + ; CHECK-NEXT: %t1:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: %f1:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK-NEXT: %t:_(<4 x s32>) = G_BUILD_VECTOR %t1(s32), %t1(s32), %t1(s32), %t1(s32) + ; CHECK-NEXT: %f:_(<4 x s32>) = G_BUILD_VECTOR %f1(s32), %f1(s32), %f1(s32), %f1(s32) + ; CHECK-NEXT: %sel:_(<4 x s32>) = G_SMIN %t, %f + ; CHECK-NEXT: $q0 = COPY %sel(<4 x s32>) + %0:_(s64) = COPY $x0 + %1:_(s64) = COPY $x1 + %t1:_(s32) = G_TRUNC %0 + %f1:_(s32) = G_TRUNC %1 + %t:_(<4 x s32>) = G_BUILD_VECTOR %t1, %t1, %t1, %t1 + %f:_(<4 x s32>) = G_BUILD_VECTOR %f1, %f1, %f1, %f1 + %c:_(<4 x s32>) = G_ICMP intpred(sle), %t(<4 x s32>), %f(<4 x s32>) + %sel:_(<4 x s32>) = exact G_SELECT %c, %t, %f + $q0 = COPY %sel(<4 x s32>) +... +--- +# multi use test select icmp_sle t,f_t_f --> smin(t,f) failed +name: multi_use_select_icmp_sle_t_f_t_f_smin_t_f_failed +body: | + bb.1: + liveins: $x0, $x1, $x2 + ; CHECK-LABEL: name: multi_use_select_icmp_sle_t_f_t_f_smin_t_f_failed + ; CHECK: liveins: $x0, $x1, $x2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 + ; CHECK-NEXT: %t1:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: %f1:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK-NEXT: %t:_(<4 x s32>) = G_BUILD_VECTOR %t1(s32), %t1(s32), %t1(s32), %t1(s32) + ; CHECK-NEXT: %f:_(<4 x s32>) = G_BUILD_VECTOR %f1(s32), %f1(s32), %f1(s32), %f1(s32) + ; CHECK-NEXT: %c:_(<4 x s32>) = G_ICMP intpred(sle), %t(<4 x s32>), %f + ; CHECK-NEXT: $q1 = COPY %c(<4 x s32>) + ; CHECK-NEXT: %sel:_(<4 x s32>) = exact G_SELECT %c(<4 x s32>), %t, %f + ; CHECK-NEXT: $q0 = COPY %sel(<4 x s32>) + %0:_(s64) = COPY $x0 + %1:_(s64) = COPY $x1 + %t1:_(s32) = G_TRUNC %0 + %f1:_(s32) = G_TRUNC %1 + %t:_(<4 x s32>) = G_BUILD_VECTOR %t1, %t1, %t1, %t1 + %f:_(<4 x s32>) = G_BUILD_VECTOR %f1, %f1, %f1, %f1 + %c:_(<4 x s32>) = G_ICMP intpred(sle), %t(<4 x s32>), %f(<4 x s32>) + $q1 = COPY %c(<4 x s32>) + %sel:_(<4 x s32>) = exact G_SELECT %c, %t, %f + $q0 = COPY %sel(<4 x s32>) +...