diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h index 66c960fe12c68..5f79bc3d8cd02 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h @@ -346,11 +346,12 @@ isKnownToBeAPowerOfTwo(Register Val, const MachineRegisterInfo &MRI, /// Returns true if \p Val can be assumed to never be a NaN. If \p SNaN is true, /// this returns if \p Val can be assumed to never be a signaling NaN. LLVM_ABI bool isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI, - bool SNaN = false); + GISelValueTracking *ValueTracking, bool SNaN = false); /// Returns true if \p Val can be assumed to never be a signaling NaN. -inline bool isKnownNeverSNaN(Register Val, const MachineRegisterInfo &MRI) { - return isKnownNeverNaN(Val, MRI, true); +inline bool isKnownNeverSNaN(Register Val, const MachineRegisterInfo &MRI, + GISelValueTracking *ValueTracking) { + return isKnownNeverNaN(Val, MRI, ValueTracking, true); } LLVM_ABI Align inferAlignFromPtrInfo(MachineFunction &MF, diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index b1e851183de0d..8952226ae7f1e 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -6519,8 +6519,8 @@ unsigned CombinerHelper::getFPMinMaxOpcForSelect( CombinerHelper::SelectPatternNaNBehaviour CombinerHelper::computeRetValAgainstNaN(Register LHS, Register RHS, bool IsOrderedComparison) const { - bool LHSSafe = isKnownNeverNaN(LHS, MRI); - bool RHSSafe = isKnownNeverNaN(RHS, MRI); + bool LHSSafe = isKnownNeverNaN(LHS, MRI, VT); + bool RHSSafe = isKnownNeverNaN(RHS, MRI, VT); // Completely unsafe. if (!LHSSafe && !RHSSafe) return SelectPatternNaNBehaviour::NOT_APPLICABLE; diff --git a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp index 1286af864fb3f..c55f543d30d7a 100644 --- a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp +++ b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp @@ -937,8 +937,6 @@ void GISelValueTracking::computeKnownFPClass(Register R, if (KnownSrc.isKnownNeverPosInfinity()) Known.knownNot(fcPosInf); - if (KnownSrc.isKnownNever(fcSNan)) - Known.knownNot(fcSNan); // Any negative value besides -0 returns a nan. if (KnownSrc.isKnownNeverNaN() && KnownSrc.cannotBeOrderedLessThanZero()) @@ -961,6 +959,7 @@ void GISelValueTracking::computeKnownFPClass(Register R, } case TargetOpcode::G_FSIN: case TargetOpcode::G_FCOS: + case TargetOpcode::G_FTAN: case TargetOpcode::G_FSINCOS: { // Return NaN on infinite inputs. Register Val = MI.getOperand(1).getReg(); @@ -968,18 +967,19 @@ void GISelValueTracking::computeKnownFPClass(Register R, computeKnownFPClass(Val, DemandedElts, InterestedClasses, KnownSrc, Depth + 1); + Known.knownNot(fcInf); if (KnownSrc.isKnownNeverNaN() && KnownSrc.isKnownNeverInfinity()) Known.knownNot(fcNan); break; } + case TargetOpcode::G_FMAXNUM_IEEE: + case TargetOpcode::G_FMINNUM_IEEE: case TargetOpcode::G_FMAXNUM: case TargetOpcode::G_FMINNUM: - case TargetOpcode::G_FMINNUM_IEEE: case TargetOpcode::G_FMAXIMUM: case TargetOpcode::G_FMINIMUM: - case TargetOpcode::G_FMAXNUM_IEEE: case TargetOpcode::G_FMAXIMUMNUM: case TargetOpcode::G_FMINIMUMNUM: { Register LHS = MI.getOperand(1).getReg(); @@ -994,6 +994,10 @@ void GISelValueTracking::computeKnownFPClass(Register R, bool NeverNaN = KnownLHS.isKnownNeverNaN() || KnownRHS.isKnownNeverNaN(); Known = KnownLHS | KnownRHS; + if (Opcode == TargetOpcode::G_FMAXNUM_IEEE || + Opcode == TargetOpcode::G_FMINNUM_IEEE) + Known.knownNot(fcSNan); + // If either operand is not NaN, the result is not NaN. if (NeverNaN && (Opcode == TargetOpcode::G_FMINNUM || Opcode == TargetOpcode::G_FMAXNUM || @@ -1001,6 +1005,12 @@ void GISelValueTracking::computeKnownFPClass(Register R, Opcode == TargetOpcode::G_FMAXIMUMNUM)) Known.knownNot(fcNan); + if ((Opcode == TargetOpcode::G_FMAXNUM_IEEE || + Opcode == TargetOpcode::G_FMINNUM_IEEE) && + ((KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNever(fcSNan)) || + (KnownLHS.isKnownNever(fcSNan) && KnownRHS.isKnownNeverNaN()))) + Known.knownNot(fcNan); + if (Opcode == TargetOpcode::G_FMAXNUM || Opcode == TargetOpcode::G_FMAXIMUMNUM || Opcode == TargetOpcode::G_FMAXNUM_IEEE) { @@ -1089,6 +1099,7 @@ void GISelValueTracking::computeKnownFPClass(Register R, case TargetOpcode::G_FCANONICALIZE: { Register Val = MI.getOperand(1).getReg(); KnownFPClass KnownSrc; + computeKnownFPClass(Val, DemandedElts, InterestedClasses, KnownSrc, Depth + 1); @@ -1190,6 +1201,8 @@ void GISelValueTracking::computeKnownFPClass(Register R, if (KnownSrc.isKnownNeverNaN()) { Known.knownNot(fcNan); Known.signBitMustBeZero(); + } else { + Known.knownNot(fcSNan); } break; @@ -1300,6 +1313,7 @@ void GISelValueTracking::computeKnownFPClass(Register R, Register LHS = MI.getOperand(1).getReg(); Register RHS = MI.getOperand(2).getReg(); KnownFPClass KnownLHS, KnownRHS; + bool WantNegative = (Opcode == TargetOpcode::G_FADD || Opcode == TargetOpcode::G_STRICT_FADD) && @@ -1364,6 +1378,7 @@ void GISelValueTracking::computeKnownFPClass(Register R, case TargetOpcode::G_STRICT_FMUL: { Register LHS = MI.getOperand(1).getReg(); Register RHS = MI.getOperand(2).getReg(); + // X * X is always non-negative or a NaN. if (LHS == RHS) Known.knownNot(fcNegative); @@ -1494,6 +1509,7 @@ void GISelValueTracking::computeKnownFPClass(Register R, Register Src = MI.getOperand(1).getReg(); // Infinity, nan and zero propagate from source. computeKnownFPClass(R, DemandedElts, InterestedClasses, Known, Depth + 1); + Known.knownNot(fcSNan); LLT DstTy = MRI.getType(Dst).getScalarType(); const fltSemantics &DstSem = getFltSemanticForLLT(DstTy); @@ -1517,6 +1533,7 @@ void GISelValueTracking::computeKnownFPClass(Register R, case TargetOpcode::G_FPTRUNC: { computeKnownFPClassForFPTrunc(MI, DemandedElts, InterestedClasses, Known, Depth); + Known.knownNot(fcSNan); break; } case TargetOpcode::G_SITOFP: @@ -1698,6 +1715,126 @@ void GISelValueTracking::computeKnownFPClass(Register R, computeKnownFPClass(Src, DemandedElts, InterestedClasses, Known, Depth + 1); break; } + case TargetOpcode::G_FATAN: { + Register Val = MI.getOperand(1).getReg(); + KnownFPClass KnownSrc; + + computeKnownFPClass(Val, DemandedElts, InterestedClasses, KnownSrc, + Depth + 1); + + if (KnownSrc.isKnownAlways(fcInf)) + Known.KnownFPClasses = fcNan; + + break; + } + case TargetOpcode::G_FATAN2: { + Register LHS = MI.getOperand(1).getReg(); + Register RHS = MI.getOperand(2).getReg(); + KnownFPClass KnownLHS; + KnownFPClass KnownRHS; + + computeKnownFPClass(LHS, DemandedElts, InterestedClasses, KnownLHS, + Depth + 1); + + computeKnownFPClass(RHS, DemandedElts, InterestedClasses, KnownRHS, + Depth + 1); + + if (!KnownRHS.isKnownNeverNaN() || !KnownRHS.isKnownNeverNaN()) + break; + + if (KnownLHS.isKnownAlways(fcZero)) { + // atan2(+-0, −0) -> +-pi + // atan2(+-0, x) -> +-pi for x < 0 + if (KnownRHS.isKnownAlways(fcNegFinite)) { + Known.KnownFPClasses = fcFinite; + break; + } + + // atan2(+-0, +0) -> +-0 + // atan2(+-0, x) -> +-0 for x > 0 + if (KnownRHS.isKnownAlways(fcPosFinite)) { + Known.KnownFPClasses = fcZero; + break; + } + } + + if (KnownRHS.isKnownAlways(fcZero)) { + // atan2(y, +-0) -> -pi/2 for y < 0 + if (KnownLHS.isKnownNeverZero() && KnownLHS.isKnownAlways(fcNegFinite)) { + Known.KnownFPClasses = fcNegFinite; + break; + } + + // atan2(y, +-0) -> +pi/2 for y > 0 + if (KnownLHS.isKnownNeverZero() && KnownLHS.isKnownAlways(fcPosFinite)) { + Known.KnownFPClasses = fcPosFinite; + break; + } + } + + if (KnownLHS.isKnownAlways(fcPosFinite) && KnownLHS.isKnownNeverZero()) { + // atan2(+-y, -inf) -> +-pi for finite y > 0 + if (KnownRHS.isKnownAlways(fcNegInf)) { + Known.KnownFPClasses = fcFinite; + break; + } + + // atan2(+-y, +inf) -> +-0 for finite y > 0 + if (KnownRHS.isKnownAlways(fcPosInf)) { + Known.KnownFPClasses = fcZero; + break; + } + } + + if (KnownLHS.isKnownAlways(fcInf)) { + // atan2(+-inf, x) -> +-pi/2 for finite x + // atan2(+-inf, -inf) -> +-3pi/4 + // atan2(+-inf, +inf) -> +-pi/4 + Known.KnownFPClasses = fcFinite; + break; + } + + break; + } + case TargetOpcode::G_FCOSH: { + Register Val = MI.getOperand(1).getReg(); + KnownFPClass KnownSrc; + + computeKnownFPClass(Val, DemandedElts, InterestedClasses, KnownSrc, + Depth + 1); + + // cosh(+-inf) -> +inf + if (KnownSrc.isKnownAlways(fcInf)) + Known.KnownFPClasses = fcPosInf; + + break; + } + case TargetOpcode::G_FSINH: { + Register Val = MI.getOperand(1).getReg(); + KnownFPClass KnownSrc; + + computeKnownFPClass(Val, DemandedElts, InterestedClasses, KnownSrc, + Depth + 1); + + // sinh(±∞) is ±∞ + if (KnownSrc.isKnownAlways(fcInf)) + Known.KnownFPClasses = fcInf; + + break; + } + case TargetOpcode::G_FTANH: { + Register Val = MI.getOperand(1).getReg(); + KnownFPClass KnownSrc; + + computeKnownFPClass(Val, DemandedElts, InterestedClasses, KnownSrc, + Depth + 1); + + // tanh(+-inf) is +-1 + if (KnownSrc.isKnownAlways(fcInf)) + Known.KnownFPClasses = fcFinite; + + break; + } } } diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 028bffd1bf5a7..6ff35c330c234 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -8179,10 +8179,10 @@ LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) { // Note this must be done here, and not as an optimization combine in the // absence of a dedicate quiet-snan instruction as we're using an // omni-purpose G_FCANONICALIZE. - if (!isKnownNeverSNaN(Src0, MRI)) + if (!isKnownNeverSNaN(Src0, MRI, VT)) Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0); - if (!isKnownNeverSNaN(Src1, MRI)) + if (!isKnownNeverSNaN(Src1, MRI, VT)) Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0); } diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp index 2584175121d63..95740e78fe8ce 100644 --- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -12,6 +12,7 @@ #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" +#include "llvm/ADT/FloatingPointMode.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/CodeGenCommonISel.h" #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" @@ -807,7 +808,7 @@ llvm::ConstantFoldVectorBinop(unsigned Opcode, const Register Op1, } bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI, - bool SNaN) { + GISelValueTracking *VT, bool SNaN) { const MachineInstr *DefMI = MRI.getVRegDef(Val); if (!DefMI) return false; @@ -816,78 +817,11 @@ bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI, if (DefMI->getFlag(MachineInstr::FmNoNans) || TM.Options.NoNaNsFPMath) return true; - // If the value is a constant, we can obviously see if it is a NaN or not. - if (const ConstantFP *FPVal = getConstantFPVRegVal(Val, MRI)) { - return !FPVal->getValueAPF().isNaN() || - (SNaN && !FPVal->getValueAPF().isSignaling()); - } - - if (DefMI->getOpcode() == TargetOpcode::G_BUILD_VECTOR) { - for (const auto &Op : DefMI->uses()) - if (!isKnownNeverNaN(Op.getReg(), MRI, SNaN)) - return false; - return true; - } + KnownFPClass FPClass = VT->computeKnownFPClass(Val, fcNan); + if (SNaN) + return FPClass.isKnownNever(fcSNan); - switch (DefMI->getOpcode()) { - default: - break; - case TargetOpcode::G_FADD: - case TargetOpcode::G_FSUB: - case TargetOpcode::G_FMUL: - case TargetOpcode::G_FDIV: - case TargetOpcode::G_FREM: - case TargetOpcode::G_FSIN: - case TargetOpcode::G_FCOS: - case TargetOpcode::G_FTAN: - case TargetOpcode::G_FACOS: - case TargetOpcode::G_FASIN: - case TargetOpcode::G_FATAN: - case TargetOpcode::G_FATAN2: - case TargetOpcode::G_FCOSH: - case TargetOpcode::G_FSINH: - case TargetOpcode::G_FTANH: - case TargetOpcode::G_FMA: - case TargetOpcode::G_FMAD: - if (SNaN) - return true; - - // TODO: Need isKnownNeverInfinity - return false; - case TargetOpcode::G_FMINNUM_IEEE: - case TargetOpcode::G_FMAXNUM_IEEE: { - if (SNaN) - return true; - // This can return a NaN if either operand is an sNaN, or if both operands - // are NaN. - return (isKnownNeverNaN(DefMI->getOperand(1).getReg(), MRI) && - isKnownNeverSNaN(DefMI->getOperand(2).getReg(), MRI)) || - (isKnownNeverSNaN(DefMI->getOperand(1).getReg(), MRI) && - isKnownNeverNaN(DefMI->getOperand(2).getReg(), MRI)); - } - case TargetOpcode::G_FMINNUM: - case TargetOpcode::G_FMAXNUM: { - // Only one needs to be known not-nan, since it will be returned if the - // other ends up being one. - return isKnownNeverNaN(DefMI->getOperand(1).getReg(), MRI, SNaN) || - isKnownNeverNaN(DefMI->getOperand(2).getReg(), MRI, SNaN); - } - } - - if (SNaN) { - // FP operations quiet. For now, just handle the ones inserted during - // legalization. - switch (DefMI->getOpcode()) { - case TargetOpcode::G_FPEXT: - case TargetOpcode::G_FPTRUNC: - case TargetOpcode::G_FCANONICALIZE: - return true; - default: - return false; - } - } - - return false; + return FPClass.isKnownNeverNaN(); } Align llvm::inferAlignFromPtrInfo(MachineFunction &MF, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td index 18a948d68e97b..2a6073c20c73b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -859,7 +859,7 @@ class NeverNaNPats frags> : PatFrags { return CurDAG->isKnownNeverNaN(SDValue(N,0)); }]; let GISelPredicateCode = [{ - return isKnownNeverNaN(MI.getOperand(0).getReg(), MRI); + return isKnownNeverNaN(MI.getOperand(0).getReg(), MRI, VT); }]; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp index ee324a5e93f0f..590731d508ce3 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp @@ -261,7 +261,7 @@ bool AMDGPURegBankCombinerImpl::matchFPMinMaxToMed3( // nodes(max/min) have same behavior when one input is NaN and other isn't. // Don't consider max(min(SNaN, K1), K0) since there is no isKnownNeverQNaN, // also post-legalizer inputs to min/max are fcanonicalized (never SNaN). - if ((getIEEE() && isFminnumIeee(MI)) || isKnownNeverNaN(Dst, MRI)) { + if ((getIEEE() && isFminnumIeee(MI)) || isKnownNeverNaN(Dst, MRI, VT)) { // Don't fold single use constant that can't be inlined. if ((!MRI.hasOneNonDBGUse(K0->VReg) || TII.isInlineConstant(K0->Value)) && (!MRI.hasOneNonDBGUse(K1->VReg) || TII.isInlineConstant(K1->Value))) { @@ -291,8 +291,8 @@ bool AMDGPURegBankCombinerImpl::matchFPMinMaxToClamp(MachineInstr &MI, // For IEEE=true consider NaN inputs. Only min(max(QNaN, 0.0), 1.0) evaluates // to 0.0 requires dx10_clamp = true. if ((getIEEE() && getDX10Clamp() && isFminnumIeee(MI) && - isKnownNeverSNaN(Val, MRI)) || - isKnownNeverNaN(MI.getOperand(0).getReg(), MRI)) { + isKnownNeverSNaN(Val, MRI, VT)) || + isKnownNeverNaN(MI.getOperand(0).getReg(), MRI, VT)) { Reg = Val; return true; } @@ -329,6 +329,8 @@ bool AMDGPURegBankCombinerImpl::matchFPMed3ToClamp(MachineInstr &MI, Register Val = Src0->getOperand(0).getReg(); auto isOp3Zero = [&]() { + if (MI.getNumOperands() < 5) + return false; MachineInstr *Op3 = getDefIgnoringCopies(MI.getOperand(4).getReg(), MRI); if (Op3->getOpcode() == TargetOpcode::G_FCONSTANT) return Op3->getOperand(1).getFPImm()->isExactlyValue(0.0); @@ -338,9 +340,9 @@ bool AMDGPURegBankCombinerImpl::matchFPMed3ToClamp(MachineInstr &MI, // no NaN inputs. Most often MI is marked with nnan fast math flag. // For IEEE=true consider NaN inputs. Requires dx10_clamp = true. Safe to fold // when Val could be QNaN. If Val can also be SNaN third input should be 0.0. - if (isKnownNeverNaN(MI.getOperand(0).getReg(), MRI) || + if (isKnownNeverNaN(MI.getOperand(0).getReg(), MRI, VT) || (getIEEE() && getDX10Clamp() && - (isKnownNeverSNaN(Val, MRI) || isOp3Zero()))) { + (isKnownNeverSNaN(Val, MRI, VT) || isOp3Zero()))) { Reg = Val; return true; } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-fmed3-const-combine.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-fmed3-const-combine.ll index ef88a2be47872..fb33d3dbfc336 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-fmed3-const-combine.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-fmed3-const-combine.ll @@ -74,7 +74,8 @@ define float @test_fmed3_maybe_SNaN_input_zero_third_operand_ieee_true_dx10clamp ; GFX10-LABEL: test_fmed3_maybe_SNaN_input_zero_third_operand_ieee_true_dx10clamp_true: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp +; GFX10-NEXT: v_mul_f32_e32 v0, 2.0, v0 +; GFX10-NEXT: v_med3_f32 v0, v0, 1.0, 0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: test_fmed3_maybe_SNaN_input_zero_third_operand_ieee_true_dx10clamp_true: @@ -84,7 +85,9 @@ define float @test_fmed3_maybe_SNaN_input_zero_third_operand_ieee_true_dx10clamp ; GFX12-NEXT: s_wait_samplecnt 0x0 ; GFX12-NEXT: s_wait_bvhcnt 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp +; GFX12-NEXT: v_mul_f32_e32 v0, 2.0, v0 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-NEXT: v_med3_num_f32 v0, v0, 1.0, 0 ; GFX12-NEXT: s_setpc_b64 s[30:31] %fmul = fmul float %a, 2.0 %fmed = call float @llvm.amdgcn.fmed3.f32(float %fmul, float 1.0, float 0.0) @@ -134,7 +137,9 @@ define float @test_fmed3_f32_maybe_NaN_ieee_false(float %a) #1 { ; GFX12-NEXT: s_wait_samplecnt 0x0 ; GFX12-NEXT: s_wait_bvhcnt 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp +; GFX12-NEXT: v_mul_f32_e32 v0, 2.0, v0 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-NEXT: v_med3_num_f32 v0, v0, 1.0, 0 ; GFX12-NEXT: s_setpc_b64 s[30:31] %fmul = fmul float %a, 2.0 %fmed = call float @llvm.amdgcn.fmed3.f32(float %fmul, float 1.0, float 0.0) @@ -172,7 +177,8 @@ define float @test_fmed3_maybe_SNaN_input_ieee_true_dx10clamp_true(float %a) #2 ; GFX10-LABEL: test_fmed3_maybe_SNaN_input_ieee_true_dx10clamp_true: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp +; GFX10-NEXT: v_mul_f32_e32 v0, 2.0, v0 +; GFX10-NEXT: v_med3_f32 v0, v0, 0, 1.0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: test_fmed3_maybe_SNaN_input_ieee_true_dx10clamp_true: @@ -182,7 +188,9 @@ define float @test_fmed3_maybe_SNaN_input_ieee_true_dx10clamp_true(float %a) #2 ; GFX12-NEXT: s_wait_samplecnt 0x0 ; GFX12-NEXT: s_wait_bvhcnt 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp +; GFX12-NEXT: v_mul_f32_e32 v0, 2.0, v0 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-NEXT: v_med3_num_f32 v0, v0, 0, 1.0 ; GFX12-NEXT: s_setpc_b64 s[30:31] %fmul = fmul float %a, 2.0 %fmed = call float @llvm.amdgcn.fmed3.f32(float %fmul, float 0.0, float 1.0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll index ab0de89d3e4e3..2955fb1f24f8b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll @@ -95,7 +95,9 @@ define <2 x half> @test_min_max_splat_padded_with_undef(<2 x half> %a) #2 { ; GFX10-LABEL: test_min_max_splat_padded_with_undef: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_pk_mul_f16 v0, v0, 2.0 op_sel_hi:[1,0] clamp +; GFX10-NEXT: v_pk_mul_f16 v0, v0, 2.0 op_sel_hi:[1,0] +; GFX10-NEXT: v_pk_max_f16 v0, v0, 0 +; GFX10-NEXT: v_pk_min_f16 v0, v0, 1.0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: test_min_max_splat_padded_with_undef: @@ -105,7 +107,10 @@ define <2 x half> @test_min_max_splat_padded_with_undef(<2 x half> %a) #2 { ; GFX12-NEXT: s_wait_samplecnt 0x0 ; GFX12-NEXT: s_wait_bvhcnt 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_pk_mul_f16 v0, v0, 2.0 op_sel_hi:[1,0] clamp +; GFX12-NEXT: v_pk_mul_f16 v0, v0, 2.0 op_sel_hi:[1,0] +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX12-NEXT: v_pk_max_num_f16 v0, v0, 0 +; GFX12-NEXT: v_pk_min_num_f16 v0, v0, 1.0 ; GFX12-NEXT: s_setpc_b64 s[30:31] %fmul = fmul <2 x half> %a, %maxnum = call <2 x half> @llvm.maxnum.v2f16(<2 x half> , <2 x half> %fmul) @@ -305,9 +310,7 @@ define float @test_min_max_maybe_NaN_input_ieee_false(float %a) #1 { ; GFX10-LABEL: test_min_max_maybe_NaN_input_ieee_false: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_mul_f32_e32 v0, 2.0, v0 -; GFX10-NEXT: v_max_f32_e32 v0, 0, v0 -; GFX10-NEXT: v_min_f32_e32 v0, 1.0, v0 +; GFX10-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: test_min_max_maybe_NaN_input_ieee_false: @@ -330,8 +333,7 @@ define float @test_min_max_maybe_NaN_input_ieee_true_dx10clamp_false(float %a) # ; GFX10-LABEL: test_min_max_maybe_NaN_input_ieee_true_dx10clamp_false: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_mul_f32_e32 v0, 2.0, v0 -; GFX10-NEXT: v_med3_f32 v0, v0, 0, 1.0 +; GFX10-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: test_min_max_maybe_NaN_input_ieee_true_dx10clamp_false: @@ -355,9 +357,7 @@ define float @test_max_min_maybe_NaN_input_ieee_true(float %a) #0 { ; GFX10-LABEL: test_max_min_maybe_NaN_input_ieee_true: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_mul_f32_e32 v0, 2.0, v0 -; GFX10-NEXT: v_min_f32_e32 v0, 1.0, v0 -; GFX10-NEXT: v_max_f32_e32 v0, 0, v0 +; GFX10-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: test_max_min_maybe_NaN_input_ieee_true: @@ -367,9 +367,7 @@ define float @test_max_min_maybe_NaN_input_ieee_true(float %a) #0 { ; GFX12-NEXT: s_wait_samplecnt 0x0 ; GFX12-NEXT: s_wait_bvhcnt 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_mul_f32_e32 v0, 2.0, v0 -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-NEXT: v_minmax_num_f32 v0, v0, 1.0, 0 +; GFX12-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp ; GFX12-NEXT: s_setpc_b64 s[30:31] %fmul = fmul float %a, 2.0 %minnum = call float @llvm.minnum.f32(float %fmul, float 1.0) @@ -381,9 +379,7 @@ define float @test_max_min_maybe_NaN_input_ieee_false(float %a) #1 { ; GFX10-LABEL: test_max_min_maybe_NaN_input_ieee_false: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_mul_f32_e32 v0, 2.0, v0 -; GFX10-NEXT: v_min_f32_e32 v0, 1.0, v0 -; GFX10-NEXT: v_max_f32_e32 v0, 0, v0 +; GFX10-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: test_max_min_maybe_NaN_input_ieee_false: @@ -393,9 +389,7 @@ define float @test_max_min_maybe_NaN_input_ieee_false(float %a) #1 { ; GFX12-NEXT: s_wait_samplecnt 0x0 ; GFX12-NEXT: s_wait_bvhcnt 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_mul_f32_e32 v0, 2.0, v0 -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-NEXT: v_minmax_num_f32 v0, v0, 1.0, 0 +; GFX12-NEXT: v_mul_f32_e64 v0, v0, 2.0 clamp ; GFX12-NEXT: s_setpc_b64 s[30:31] %fmul = fmul float %a, 2.0 %minnum = call float @llvm.minnum.f32(float %fmul, float 1.0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll index cf0547e112a6b..df8573048d252 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll @@ -456,15 +456,13 @@ define float @test_min_max_maybe_NaN_input_ieee_false(float %a) #1 { ; GFX10-LABEL: test_min_max_maybe_NaN_input_ieee_false: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_max_f32_e32 v0, 2.0, v0 -; GFX10-NEXT: v_min_f32_e32 v0, 4.0, v0 +; GFX10-NEXT: v_med3_f32 v0, v0, 2.0, 4.0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: test_min_max_maybe_NaN_input_ieee_false: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_max_f32_e32 v0, 2.0, v0 -; GFX8-NEXT: v_min_f32_e32 v0, 4.0, v0 +; GFX8-NEXT: v_med3_f32 v0, v0, 2.0, 4.0 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: test_min_max_maybe_NaN_input_ieee_false: @@ -489,15 +487,13 @@ define float @test_max_min_maybe_NaN_input_ieee_false(float %a) #1 { ; GFX10-LABEL: test_max_min_maybe_NaN_input_ieee_false: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_min_f32_e32 v0, 4.0, v0 -; GFX10-NEXT: v_max_f32_e32 v0, 2.0, v0 +; GFX10-NEXT: v_med3_f32 v0, v0, 2.0, 4.0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: test_max_min_maybe_NaN_input_ieee_false: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_min_f32_e32 v0, 4.0, v0 -; GFX8-NEXT: v_max_f32_e32 v0, 2.0, v0 +; GFX8-NEXT: v_med3_f32 v0, v0, 2.0, 4.0 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: test_max_min_maybe_NaN_input_ieee_false: @@ -509,7 +505,7 @@ define float @test_max_min_maybe_NaN_input_ieee_false(float %a) #1 { ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v0 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-NEXT: v_minmax_num_f32 v0, v0, 4.0, 2.0 +; GFX12-NEXT: v_med3_num_f32 v0, v0, 2.0, 4.0 ; GFX12-NEXT: s_setpc_b64 s[30:31] %minnum = call float @llvm.minnum.f32(float %a, float 4.0) %fmed = call float @llvm.maxnum.f32(float %minnum, float 2.0) @@ -522,16 +518,14 @@ define float @test_max_min_maybe_NaN_input_ieee_true(float %a) #0 { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX10-NEXT: v_min_f32_e32 v0, 4.0, v0 -; GFX10-NEXT: v_max_f32_e32 v0, 2.0, v0 +; GFX10-NEXT: v_med3_f32 v0, v0, 2.0, 4.0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: test_max_min_maybe_NaN_input_ieee_true: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_mul_f32_e32 v0, 1.0, v0 -; GFX8-NEXT: v_min_f32_e32 v0, 4.0, v0 -; GFX8-NEXT: v_max_f32_e32 v0, 2.0, v0 +; GFX8-NEXT: v_med3_f32 v0, v0, 2.0, 4.0 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: test_max_min_maybe_NaN_input_ieee_true: @@ -543,7 +537,7 @@ define float @test_max_min_maybe_NaN_input_ieee_true(float %a) #0 { ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v0 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-NEXT: v_minmax_num_f32 v0, v0, 4.0, 2.0 +; GFX12-NEXT: v_med3_num_f32 v0, v0, 2.0, 4.0 ; GFX12-NEXT: s_setpc_b64 s[30:31] %minnum = call float @llvm.minnum.f32(float %a, float 4.0) %fmed = call float @llvm.maxnum.f32(float %minnum, float 2.0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-clamp-fmed3-const.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-clamp-fmed3-const.mir index a97d905f2a978..129cbcfca6fa5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-clamp-fmed3-const.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-clamp-fmed3-const.mir @@ -162,8 +162,12 @@ body: | ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FMUL]] - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00 + ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) + ; CHECK-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FMED3 [[FMUL]], [[COPY2]], [[COPY3]] + ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32) ; ; GFX12-LABEL: name: test_fmed3_maybe_SNaN_input_zero_third_operand_ieee_true_dx10clamp_true ; GFX12: liveins: $vgpr0 @@ -172,8 +176,12 @@ body: | ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; GFX12-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]] - ; GFX12-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FMUL]] - ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32) + ; GFX12-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00 + ; GFX12-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32) + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) + ; GFX12-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FMED3 [[FMUL]], [[COPY2]], [[COPY3]] + ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32) %0:vgpr(s32) = COPY $vgpr0 %2:sgpr(s32) = G_FCONSTANT float 2.000000e+00 %8:vgpr(s32) = COPY %2(s32) @@ -222,8 +230,12 @@ body: | ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; GFX12-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]] - ; GFX12-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FMUL]] - ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32) + ; GFX12-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00 + ; GFX12-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32) + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) + ; GFX12-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FMED3 [[FMUL]], [[COPY2]], [[COPY3]] + ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32) %0:vgpr(s32) = COPY $vgpr0 %2:sgpr(s32) = G_FCONSTANT float 2.000000e+00 %8:vgpr(s32) = COPY %2(s32) @@ -307,8 +319,12 @@ body: | ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FMUL]] - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00 + ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) + ; CHECK-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FMED3 [[FMUL]], [[COPY2]], [[COPY3]] + ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32) ; ; GFX12-LABEL: name: test_fmed3_maybe_SNaN_input_ieee_true_dx10clamp_true ; GFX12: liveins: $vgpr0 @@ -317,8 +333,12 @@ body: | ; GFX12-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; GFX12-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]] - ; GFX12-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FMUL]] - ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32) + ; GFX12-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00 + ; GFX12-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00 + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32) + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) + ; GFX12-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FMED3 [[FMUL]], [[COPY2]], [[COPY3]] + ; GFX12-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32) %0:vgpr(s32) = COPY $vgpr0 %2:sgpr(s32) = G_FCONSTANT float 2.000000e+00 %8:vgpr(s32) = COPY %2(s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-clamp-minmax-const.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-clamp-minmax-const.mir index 70fd67363648d..c3eed50092056 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-clamp-minmax-const.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-clamp-minmax-const.mir @@ -441,13 +441,8 @@ body: | ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[FMAXNUM:%[0-9]+]]:vgpr(s32) = G_FMAXNUM [[FMUL]], [[COPY2]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[FMINNUM:%[0-9]+]]:vgpr(s32) = G_FMINNUM [[FMAXNUM]], [[COPY3]] - ; CHECK-NEXT: $vgpr0 = COPY [[FMINNUM]](s32) + ; CHECK-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FMUL]] + ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32) %0:vgpr(s32) = COPY $vgpr0 %2:sgpr(s32) = G_FCONSTANT float 2.000000e+00 %9:vgpr(s32) = COPY %2(s32) @@ -481,13 +476,9 @@ body: | ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00 ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:vgpr(s32) = G_FCANONICALIZE [[FMUL]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FMED3 [[FCANONICALIZE]], [[COPY2]], [[COPY3]] - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32) + ; CHECK-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FCANONICALIZE]] + ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32) %0:vgpr(s32) = COPY $vgpr0 %2:sgpr(s32) = G_FCONSTANT float 2.000000e+00 %10:vgpr(s32) = COPY %2(s32) @@ -522,14 +513,9 @@ body: | ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00 ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:vgpr(s32) = G_FCANONICALIZE [[FMUL]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:vgpr(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[COPY2]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:vgpr(s32) = G_FMAXNUM_IEEE [[FMINNUM_IEEE]], [[COPY3]] - ; CHECK-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; CHECK-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FCANONICALIZE]] + ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32) %0:vgpr(s32) = COPY $vgpr0 %2:sgpr(s32) = G_FCONSTANT float 2.000000e+00 %10:vgpr(s32) = COPY %2(s32) @@ -564,13 +550,8 @@ body: | ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY]], [[COPY1]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[FMINNUM:%[0-9]+]]:vgpr(s32) = G_FMINNUM [[FMUL]], [[COPY2]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32) - ; CHECK-NEXT: [[FMAXNUM:%[0-9]+]]:vgpr(s32) = G_FMAXNUM [[FMINNUM]], [[COPY3]] - ; CHECK-NEXT: $vgpr0 = COPY [[FMAXNUM]](s32) + ; CHECK-NEXT: [[AMDGPU_CLAMP:%[0-9]+]]:vgpr(s32) = G_AMDGPU_CLAMP [[FMUL]] + ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_CLAMP]](s32) %0:vgpr(s32) = COPY $vgpr0 %2:sgpr(s32) = G_FCONSTANT float 2.000000e+00 %9:vgpr(s32) = COPY %2(s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-fmed3-minmax-const.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-fmed3-minmax-const.mir index 2f41d86100040..a53e97af0d028 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-fmed3-minmax-const.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-fmed3-minmax-const.mir @@ -469,11 +469,10 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[FMAXNUM:%[0-9]+]]:vgpr(s32) = G_FMAXNUM [[COPY]], [[COPY1]] ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 4.000000e+00 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[FMINNUM:%[0-9]+]]:vgpr(s32) = G_FMINNUM [[FMAXNUM]], [[COPY2]] - ; CHECK-NEXT: $vgpr0 = COPY [[FMINNUM]](s32) + ; CHECK-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FMED3 [[COPY]], [[COPY1]], [[COPY2]] + ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32) %0:vgpr(s32) = COPY $vgpr0 %2:sgpr(s32) = G_FCONSTANT float 2.000000e+00 %7:vgpr(s32) = COPY %2(s32) @@ -502,11 +501,10 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 4.000000e+00 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[FMINNUM:%[0-9]+]]:vgpr(s32) = G_FMINNUM [[COPY]], [[COPY1]] ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[FMAXNUM:%[0-9]+]]:vgpr(s32) = G_FMAXNUM [[FMINNUM]], [[COPY2]] - ; CHECK-NEXT: $vgpr0 = COPY [[FMAXNUM]](s32) + ; CHECK-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FMED3 [[COPY]], [[COPY2]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32) %0:vgpr(s32) = COPY $vgpr0 %2:sgpr(s32) = G_FCONSTANT float 4.000000e+00 %7:vgpr(s32) = COPY %2(s32) @@ -536,11 +534,10 @@ body: | ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 4.000000e+00 ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:vgpr(s32) = G_FCANONICALIZE [[COPY]] ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:vgpr(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[COPY1]] ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 2.000000e+00 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:vgpr(s32) = G_FMAXNUM_IEEE [[FMINNUM_IEEE]], [[COPY2]] - ; CHECK-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; CHECK-NEXT: [[AMDGPU_FMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_FMED3 [[FCANONICALIZE]], [[COPY2]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_FMED3_]](s32) %0:vgpr(s32) = COPY $vgpr0 %2:sgpr(s32) = G_FCONSTANT float 4.000000e+00 %7:vgpr(s32) = G_FCANONICALIZE %0