diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 6805e0cb23ace..9bcae42fc764d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -22807,15 +22807,15 @@ static SDValue scalarizeExtractedBinOp(SDNode *ExtElt, SelectionDAG &DAG, Vec->getNumValues() != 1) return SDValue(); + // Targets may want to avoid this to prevent an expensive register transfer. + if (!TLI.shouldScalarizeBinop(Vec)) + return SDValue(); + EVT ResVT = ExtElt->getValueType(0); if (Opc == ISD::SETCC && (ResVT != Vec.getValueType().getVectorElementType() || LegalTypes)) return SDValue(); - // Targets may want to avoid this to prevent an expensive register transfer. - if (!TLI.shouldScalarizeBinop(Vec)) - return SDValue(); - // Extracting an element of a vector constant is constant-folded, so this // transform is just replacing a vector op with a scalar op while moving the // extract. @@ -22834,8 +22834,21 @@ static SDValue scalarizeExtractedBinOp(SDNode *ExtElt, SelectionDAG &DAG, EVT OpVT = Op0.getValueType().getVectorElementType(); Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Op0, Index); Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Op1, Index); - return DAG.getSetCC(DL, ResVT, Op0, Op1, - cast(Vec->getOperand(2))->get()); + SDValue NewVal = DAG.getSetCC( + DL, ResVT, Op0, Op1, cast(Vec->getOperand(2))->get()); + // We may need to sign- or zero-extend the result to match the same + // behaviour as the vector version of SETCC. + unsigned VecBoolContents = TLI.getBooleanContents(Vec.getValueType()); + if (ResVT != MVT::i1 && + VecBoolContents != TargetLowering::UndefinedBooleanContent && + VecBoolContents != TLI.getBooleanContents(ResVT)) { + if (VecBoolContents == TargetLowering::ZeroOrNegativeOneBooleanContent) + NewVal = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, ResVT, NewVal, + DAG.getValueType(MVT::i1)); + else + NewVal = DAG.getZeroExtendInReg(NewVal, DL, MVT::i1); + } + return NewVal; } Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op0, Index); Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op1, Index); diff --git a/llvm/test/CodeGen/AArch64/extract-vector-cmp.ll b/llvm/test/CodeGen/AArch64/extract-vector-cmp.ll index 12bd2db2297d7..8345fdfa46b4c 100644 --- a/llvm/test/CodeGen/AArch64/extract-vector-cmp.ll +++ b/llvm/test/CodeGen/AArch64/extract-vector-cmp.ll @@ -58,10 +58,11 @@ define i128 @extract_icmp_v1i128(ptr %p) { ; CHECK-LABEL: extract_icmp_v1i128: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp x9, x8, [x0] -; CHECK-NEXT: mov x1, xzr ; CHECK-NEXT: orr x8, x9, x8 ; CHECK-NEXT: cmp x8, #0 -; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: cset w8, eq +; CHECK-NEXT: sbfx x0, x8, #0, #1 +; CHECK-NEXT: mov x1, x0 ; CHECK-NEXT: ret %load = load <1 x i128>, ptr %p, align 16 %cmp = icmp eq <1 x i128> %load, zeroinitializer @@ -141,6 +142,26 @@ for.cond.cleanup: } +; TODO: Combine the sbfx(cset) into a csetm +define i32 @issue_121372(<4 x i32> %v) { +; CHECK-LABEL: issue_121372: +; CHECK: // %bb.0: +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: cset w8, eq +; CHECK-NEXT: sbfx w8, w8, #0, #1 +; CHECK-NEXT: cmp w8, #1 +; CHECK-NEXT: csetm w0, lt +; CHECK-NEXT: ret + %cmp_ule = icmp ule <4 x i32> %v, zeroinitializer + %sext_v4i1 = sext <4 x i1> %cmp_ule to <4 x i32> + %cmp_sge = icmp sge <4 x i32> zeroinitializer, %sext_v4i1 + %ext = extractelement <4 x i1> %cmp_sge, i32 0 + %res = sext i1 %ext to i32 + ret i32 %res +} + + ; Negative tests define i1 @extract_icmp_v4i32_splat_rhs(<4 x i32> %a, i32 %b) { @@ -163,9 +184,9 @@ define i1 @extract_icmp_v4i32_splat_rhs_mul_use(<4 x i32> %a, ptr %p) { ; CHECK-LABEL: extract_icmp_v4i32_splat_rhs_mul_use: ; CHECK: // %bb.0: ; CHECK-NEXT: movi v1.4s, #235 -; CHECK-NEXT: adrp x9, .LCPI7_0 +; CHECK-NEXT: adrp x9, .LCPI8_0 ; CHECK-NEXT: mov x8, x0 -; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI7_0] +; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI8_0] ; CHECK-NEXT: cmhi v0.4s, v1.4s, v0.4s ; CHECK-NEXT: xtn v1.4h, v0.4s ; CHECK-NEXT: and v0.16b, v0.16b, v2.16b