From 625e962bc4ee99755e26713c4cae84fc1263d353 Mon Sep 17 00:00:00 2001 From: shalininikhil Date: Tue, 4 Feb 2025 17:56:47 +0000 Subject: [PATCH 1/6] [DAG][X86]added shrd in combineor for bzhiq+shlq+or --- llvm/lib/Target/X86/X86ISelLowering.cpp | 29 +++++++++++++++++++ .../X86/shrdq-to-insert-into-bitfield.ll | 18 ++++++++++++ 2 files changed, 47 insertions(+) create mode 100644 llvm/test/CodeGen/X86/shrdq-to-insert-into-bitfield.ll diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index a956074e50d86..395c0f5504d1b 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -51887,6 +51887,35 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG, } } + if (N0.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SHL){ + SDValue SHL = (N0.getOpcode() == ISD::SHL) ? N0 : N1; + SDValue OtherOp = (N0.getOpcode() == ISD::SHL) ? N1 : N0; + + if (OtherOp.getOpcode() == ISD::AND) { + SDValue andop = OtherOp; + + if(andop.getOperand(0).getOpcode()==ISD::Constant||andop.getOperand(1).getOpcode()==ISD::Constant){ + + SDValue constOp = andop.getOperand(0).getOpcode()==ISD::Constant ? andop.getOperand(0): andop.getOperand(1); + SDValue valueOp = andop.getOperand(0).getOpcode()==ISD::Constant ? andop.getOperand(1): andop.getOperand(0); + auto *ConstRHS = dyn_cast(constOp); + uint64_t maskValue = ConstRHS->getZExtValue(); + auto *ConstSHL = dyn_cast(SHL.getOperand(1)); + uint64_t shiftValue = ConstSHL->getZExtValue(); + + if((((uint64_t)1< Date: Wed, 5 Feb 2025 12:16:54 +0000 Subject: [PATCH 2/6] using sd_match to match this pattern --- llvm/lib/Target/X86/X86ISelLowering.cpp | 39 ++++++++----------------- 1 file changed, 12 insertions(+), 27 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 395c0f5504d1b..c4624f9e1a5d5 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -51887,33 +51887,18 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG, } } - if (N0.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SHL){ - SDValue SHL = (N0.getOpcode() == ISD::SHL) ? N0 : N1; - SDValue OtherOp = (N0.getOpcode() == ISD::SHL) ? N1 : N0; - - if (OtherOp.getOpcode() == ISD::AND) { - SDValue andop = OtherOp; - - if(andop.getOperand(0).getOpcode()==ISD::Constant||andop.getOperand(1).getOpcode()==ISD::Constant){ - - SDValue constOp = andop.getOperand(0).getOpcode()==ISD::Constant ? andop.getOperand(0): andop.getOperand(1); - SDValue valueOp = andop.getOperand(0).getOpcode()==ISD::Constant ? andop.getOperand(1): andop.getOperand(0); - auto *ConstRHS = dyn_cast(constOp); - uint64_t maskValue = ConstRHS->getZExtValue(); - auto *ConstSHL = dyn_cast(SHL.getOperand(1)); - uint64_t shiftValue = ConstSHL->getZExtValue(); - - if((((uint64_t)1< Date: Thu, 13 Feb 2025 13:00:37 +0530 Subject: [PATCH 3/6] formated the code with clang-format and Replaced both getConstant with getShiftAmountConstant --- llvm/lib/Target/X86/X86ISelLowering.cpp | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index c4624f9e1a5d5..c283974dd91de 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -51891,12 +51891,14 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG, APInt MaskConst,ShlConst; SDValue A, B; if(sd_match(N,m_Or(m_Shl(m_Value(B),m_ConstInt(ShlConst)),m_And(m_Value(A),m_ConstInt(MaskConst))))){ - uint64_t shiftValue = ShlConst.getZExtValue(); - if(MaskConst.isMask(shiftValue)){ - unsigned numbits = B.getScalarValueSizeInBits(); - unsigned newshift=numbits-shiftValue; - SDValue newSHL = DAG.getNode(ISD::SHL,dl,VT,A,DAG.getConstant(newshift, dl, MVT::i8)); - SDValue R = DAG.getNode(ISD::FSHR,dl,VT,B,newSHL,DAG.getConstant(newshift, dl, MVT::i8)); + uint64_t ShiftValue = ShlConst.getZExtValue(); + if (MaskConst.isMask(ShiftValue)) { + unsigned NumBits = B.getScalarValueSizeInBits(); + unsigned NewShift = NumBits - ShiftValue; + SDValue NewSHL = DAG.getNode( + ISD::SHL, dl, VT, A, DAG.getShiftAmountConstant(NewShift, VT, dl)); + SDValue R = DAG.getNode(ISD::FSHR, dl, VT, B, NewSHL, + DAG.getShiftAmountConstant(NewShift, VT, dl)); return R; } } From d8b8f1212b5342345c678d4c155670bdbda4645b Mon Sep 17 00:00:00 2001 From: shalini-nik Date: Fri, 14 Feb 2025 12:42:11 +0530 Subject: [PATCH 4/6] [X86] adding checks for slow-shld and fixing the clang-format warnings --- llvm/lib/Target/X86/X86ISelLowering.cpp | 31 ++++++++++++++----------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index c283974dd91de..6dc7216af450a 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -51887,22 +51887,25 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG, } } - using namespace llvm::SDPatternMatch; - APInt MaskConst,ShlConst; - SDValue A, B; - if(sd_match(N,m_Or(m_Shl(m_Value(B),m_ConstInt(ShlConst)),m_And(m_Value(A),m_ConstInt(MaskConst))))){ - uint64_t ShiftValue = ShlConst.getZExtValue(); - if (MaskConst.isMask(ShiftValue)) { - unsigned NumBits = B.getScalarValueSizeInBits(); - unsigned NewShift = NumBits - ShiftValue; - SDValue NewSHL = DAG.getNode( - ISD::SHL, dl, VT, A, DAG.getShiftAmountConstant(NewShift, VT, dl)); - SDValue R = DAG.getNode(ISD::FSHR, dl, VT, B, NewSHL, - DAG.getShiftAmountConstant(NewShift, VT, dl)); - return R; + if (!Subtarget.isSHLDSlow()) { + using namespace llvm::SDPatternMatch; + APInt MaskConst, ShlConst; + SDValue A, B; + if (sd_match(N, m_Or(m_Shl(m_Value(B), m_ConstInt(ShlConst)), + m_And(m_Value(A), m_ConstInt(MaskConst))))) { + uint64_t ShiftValue = ShlConst.getZExtValue(); + if (MaskConst.isMask(ShiftValue)) { + unsigned NumBits = B.getScalarValueSizeInBits(); + unsigned NewShift = NumBits - ShiftValue; + SDValue NewSHL = DAG.getNode( + ISD::SHL, dl, VT, A, DAG.getShiftAmountConstant(NewShift, VT, dl)); + SDValue R = DAG.getNode(ISD::FSHR, dl, VT, B, NewSHL, + DAG.getShiftAmountConstant(NewShift, VT, dl)); + return R; + } } } - + if (SDValue SetCC = combineAndOrForCcmpCtest(N, DAG, DCI, Subtarget)) return SetCC; From c4d62a195fa93f228e940a8614634ccf77d85839 Mon Sep 17 00:00:00 2001 From: shalini-nik Date: Fri, 14 Feb 2025 12:43:10 +0530 Subject: [PATCH 5/6] [X86] updating test cases --- llvm/test/CodeGen/X86/insert-bitfield.ll | 25 +++++++++++++++++++ .../X86/shrdq-to-insert-into-bitfield.ll | 18 ------------- 2 files changed, 25 insertions(+), 18 deletions(-) create mode 100644 llvm/test/CodeGen/X86/insert-bitfield.ll delete mode 100644 llvm/test/CodeGen/X86/shrdq-to-insert-into-bitfield.ll diff --git a/llvm/test/CodeGen/X86/insert-bitfield.ll b/llvm/test/CodeGen/X86/insert-bitfield.ll new file mode 100644 index 0000000000000..828744604f084 --- /dev/null +++ b/llvm/test/CodeGen/X86/insert-bitfield.ll @@ -0,0 +1,25 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s --check-prefixes=FAST +; RUN: llc < %s -mtriple=x86_64-- -mattr=+slow-shld | FileCheck %s --check-prefixes=SLOW + +define i64 @updateTop10Bits(i64 %A, i64 %B) { +; FAST-LABEL: updateTop10Bits: +; FAST: # %bb.0: +; FAST-NEXT: movq %rdi, %rax +; FAST-NEXT: shlq $10, %rax +; FAST-NEXT: shrdq $10, %rsi, %rax +; FAST-NEXT: retq +; +; SLOW-LABEL: updateTop10Bits: +; SLOW: # %bb.0: +; SLOW-NEXT: movabsq $18014398509481983, %rax +; SLOW-NEXT: andq %rdi, %rax +; SLOW-NEXT: shlq $54, %rsi +; SLOW-NEXT: orq %rsi, %rax +; SLOW-NEXT: retq +entry: + %and = and i64 %A, 18014398509481983 + %shl = shl i64 %B, 54 + %or = or disjoint i64 %shl, %and + ret i64 %or +} diff --git a/llvm/test/CodeGen/X86/shrdq-to-insert-into-bitfield.ll b/llvm/test/CodeGen/X86/shrdq-to-insert-into-bitfield.ll deleted file mode 100644 index cc205ee145d88..0000000000000 --- a/llvm/test/CodeGen/X86/shrdq-to-insert-into-bitfield.ll +++ /dev/null @@ -1,18 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -O3 < %s | FileCheck %s - -define dso_local i64 @updateTop10Bits(i64 noundef %A, i64 noundef %B) local_unnamed_addr #0 { -; CHECK-LABEL: updateTop10Bits: -; CHECK: # %bb.0: -; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: shlq $10, %rax -; CHECK-NEXT: shrdq $10, %rsi, %rax -; CHECK-NEXT: retq -entry: - %and = and i64 %A, 18014398509481983 - %shl = shl i64 %B, 54 - %or = or disjoint i64 %shl, %and - ret i64 %or -} - -attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="skylake" "target-features"="+adx,+aes,+avx,+avx2,+bmi,+bmi2,+clflushopt,+cmov,+crc32,+cx16,+cx8,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prfchw,+rdrnd,+rdseed,+sahf,+sgx,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" } \ No newline at end of file From b731a517cf0002f6ea0efee639769ff1e1c16848 Mon Sep 17 00:00:00 2001 From: shalini-nik Date: Mon, 17 Mar 2025 18:46:39 +0530 Subject: [PATCH 6/6] [x86] added checks to pattern matching --- llvm/lib/Target/X86/X86ISelLowering.cpp | 6 +- llvm/test/CodeGen/X86/insert-bitfield.ll | 97 ++++++++++++++++++++---- 2 files changed, 85 insertions(+), 18 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 6dc7216af450a..a206fdf62c6e2 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -51887,21 +51887,23 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG, } } - if (!Subtarget.isSHLDSlow()) { + if (Subtarget.is64Bit()&&!Subtarget.isSHLDSlow()&&(VT == MVT::i16||VT == MVT::i32||VT == MVT::i64)) { using namespace llvm::SDPatternMatch; APInt MaskConst, ShlConst; SDValue A, B; if (sd_match(N, m_Or(m_Shl(m_Value(B), m_ConstInt(ShlConst)), m_And(m_Value(A), m_ConstInt(MaskConst))))) { uint64_t ShiftValue = ShlConst.getZExtValue(); - if (MaskConst.isMask(ShiftValue)) { + if (MaskConst.isMask(ShiftValue)&& (A.getOpcode()==ISD::CopyFromReg|| A.getOpcode()==ISD::TRUNCATE)&& (B.getOpcode()==ISD::CopyFromReg||B.getOpcode()==ISD::TRUNCATE)) { unsigned NumBits = B.getScalarValueSizeInBits(); unsigned NewShift = NumBits - ShiftValue; + if(ShiftValue>4&&ShiftValue!=8&&ShiftValue!=16&&ShiftValue!=32&&ShiftValue!=64){ SDValue NewSHL = DAG.getNode( ISD::SHL, dl, VT, A, DAG.getShiftAmountConstant(NewShift, VT, dl)); SDValue R = DAG.getNode(ISD::FSHR, dl, VT, B, NewSHL, DAG.getShiftAmountConstant(NewShift, VT, dl)); return R; + } } } } diff --git a/llvm/test/CodeGen/X86/insert-bitfield.ll b/llvm/test/CodeGen/X86/insert-bitfield.ll index 828744604f084..2d8825dab2974 100644 --- a/llvm/test/CodeGen/X86/insert-bitfield.ll +++ b/llvm/test/CodeGen/X86/insert-bitfield.ll @@ -1,25 +1,90 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s --check-prefixes=FAST -; RUN: llc < %s -mtriple=x86_64-- -mattr=+slow-shld | FileCheck %s --check-prefixes=SLOW +; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s --check-prefixes=X64,X64-FAST +; RUN: llc < %s -mtriple=x86_64-- -mattr=+slow-shld | FileCheck %s --check-prefixes=X64,X64-SLOW -define i64 @updateTop10Bits(i64 %A, i64 %B) { -; FAST-LABEL: updateTop10Bits: -; FAST: # %bb.0: -; FAST-NEXT: movq %rdi, %rax -; FAST-NEXT: shlq $10, %rax -; FAST-NEXT: shrdq $10, %rsi, %rax -; FAST-NEXT: retq +define i64 @updateTop10Bits_64bits(i64 %A, i64 %B) { +; X64-FAST-LABEL: updateTop10Bits_64bits: +; X64-FAST: # %bb.0: +; X64-FAST-NEXT: movq %rdi, %rax +; X64-FAST-NEXT: shlq $10, %rax +; X64-FAST-NEXT: shrdq $10, %rsi, %rax +; X64-FAST-NEXT: retq ; -; SLOW-LABEL: updateTop10Bits: -; SLOW: # %bb.0: -; SLOW-NEXT: movabsq $18014398509481983, %rax -; SLOW-NEXT: andq %rdi, %rax -; SLOW-NEXT: shlq $54, %rsi -; SLOW-NEXT: orq %rsi, %rax -; SLOW-NEXT: retq +; X64-SLOW-LABEL: updateTop10Bits_64bits: +; X64-SLOW: # %bb.0: +; X64-SLOW-NEXT: movabsq $18014398509481983, %rax +; X64-SLOW-NEXT: andq %rdi, %rax +; X64-SLOW-NEXT: shlq $54, %rsi +; X64-SLOW-NEXT: orq %rsi, %rax +; X64-SLOW-NEXT: retq entry: %and = and i64 %A, 18014398509481983 %shl = shl i64 %B, 54 %or = or disjoint i64 %shl, %and ret i64 %or } + +define i32 @updateTop10Bits_32bits(i32 %A, i32 %B) { +; X64-FAST-LABEL: updateTop10Bits_32bits: +; X64-FAST: # %bb.0: # %entry +; X64-FAST-NEXT: movl %edi, %eax +; X64-FAST-NEXT: shll $10, %eax +; X64-FAST-NEXT: shrdl $10, %esi, %eax +; X64-FAST-NEXT: retq +; +; X64-SLOW-LABEL: updateTop10Bits_32bits: +; X64-SLOW: # %bb.0: # %entry +; X64-SLOW-NEXT: # kill: def $esi killed $esi def $rsi +; X64-SLOW-NEXT: # kill: def $edi killed $edi def $rdi +; X64-SLOW-NEXT: andl $4194303, %edi # imm = 0x3FFFFF +; X64-SLOW-NEXT: shll $22, %esi +; X64-SLOW-NEXT: leal (%rsi,%rdi), %eax +; X64-SLOW-NEXT: retq +entry: + %and = and i32 %A, 4194303 + %shl = shl i32 %B, 22 + %or = or disjoint i32 %shl, %and + ret i32 %or +} + +define i16 @updateTop10Bits_16bits(i16 %A, i16 %B) { +; X64-FAST-LABEL: updateTop10Bits_16bits: +; X64-FAST: # %bb.0: # %entry +; X64-FAST-NEXT: movl %edi, %eax +; X64-FAST-NEXT: shll $10, %eax +; X64-FAST-NEXT: shrdw $10, %si, %ax +; X64-FAST-NEXT: # kill: def $ax killed $ax killed $eax +; X64-FAST-NEXT: retq +; +; X64-SLOW-LABEL: updateTop10Bits_16bits: +; X64-SLOW: # %bb.0: # %entry +; X64-SLOW-NEXT: # kill: def $esi killed $esi def $rsi +; X64-SLOW-NEXT: # kill: def $edi killed $edi def $rdi +; X64-SLOW-NEXT: andl $63, %edi +; X64-SLOW-NEXT: shll $6, %esi +; X64-SLOW-NEXT: leal (%rsi,%rdi), %eax +; X64-SLOW-NEXT: # kill: def $ax killed $ax killed $eax +; X64-SLOW-NEXT: retq +entry: + %and = and i16 %A, 63 + %shl = shl i16 %B, 6 + %or = or disjoint i16 %shl, %and + ret i16 %or +} + +define i8 @updateTop3Bits_8bits(i8 %A, i8 %B) { +; X64-LABEL: updateTop3Bits_8bits: +; X64: # %bb.0: # %entry +; X64-NEXT: # kill: def $esi killed $esi def $rsi +; X64-NEXT: andb $7, %dil +; X64-NEXT: leal (,%rsi,8), %eax +; X64-NEXT: orb %dil, %al +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: retq + +entry: + %and = and i8 %A, 7 + %shl = shl i8 %B, 3 + %or = or disjoint i8 %shl, %and + ret i8 %or +} \ No newline at end of file