-
Notifications
You must be signed in to change notification settings - Fork 14.4k
[RISCV] Select unsigned bitfield insert for XAndesPerf #142737
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
The XAndesPerf extension includes unsigned bitfield extraction instruction `NDS.BFOZ`, which can extract the bits from 0 to Len -1, place them starting at bit Msb, and zero-fills the remaining bits. This patch handles the cases where Msb < Lsb. Instruction Sytax: nds.bfoz Rd, Rs1, Msb, Lsb The operation is: if Msb < Lsb: Lenm1 = Lsb - Msb; Rd[Lsb:Msb] = Rs1[Lenm1:0]; if (Lsb < (XLen -1)) Rd[XLen-1:Lsb+1]=0; Rd[Msb-1:0]=0; When Len == 1, it is a special case where the Msb is set to 0 instead of being equal to the Lsb.
@llvm/pr-subscribers-backend-risc-v Author: Jim Lin (tclin914) ChangesThe XAndesPerf extension includes unsigned bitfield extraction This patch handles the cases where Msb < Lsb. Instruction Sytax:
The operation is:
When Len == 1, it is a special case where the Msb is set to 0 instead of Full diff: https://github.com/llvm/llvm-project/pull/142737.diff 5 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index 4f6aa41d1e03b..4c4b475ed3898 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -688,6 +688,23 @@ bool RISCVDAGToDAGISel::tryUnsignedBitfieldExtract(SDNode *Node, SDLoc DL,
return true;
}
+bool RISCVDAGToDAGISel::tryUnsignedBitfieldInsertInZero(SDNode *Node, SDLoc DL,
+ MVT VT, SDValue X,
+ unsigned Msb,
+ unsigned Lsb) {
+ // Only supported with XAndesPerf at the moment.
+ if (!Subtarget->hasVendorXAndesPerf())
+ return false;
+
+ unsigned Opc = RISCV::NDS_BFOZ;
+
+ SDNode *Ubi = CurDAG->getMachineNode(Opc, DL, VT, X,
+ CurDAG->getTargetConstant(Msb, DL, VT),
+ CurDAG->getTargetConstant(Lsb, DL, VT));
+ ReplaceNode(Node, Ubi);
+ return true;
+}
+
bool RISCVDAGToDAGISel::tryIndexedLoad(SDNode *Node) {
// Target does not support indexed loads.
if (!Subtarget->hasVendorXTHeadMemIdx())
@@ -1324,6 +1341,23 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
return;
}
+ // Try to use an unsigned bitfield insert (e.g., nds.bfoz) if
+ // available.
+ // Transform (and (shl x, c2), c1)
+ // -> (<bfinsert> x, msb, lsb)
+ // e.g.
+ // (and (shl x, 12), 0x00fff000)
+ // If XLen = 32 and C2 = 12, then
+ // Len = 32 - 8 - 12 = 12,
+ // Lsb = 32 - 8 - 1 = 23 and Msb = 12
+ // -> nds.bfoz x, 12, 23
+ const unsigned Len = XLen - Leading - C2;
+ const unsigned Lsb = XLen - Leading - 1;
+ // If Len is 1, the Msb will be 0 instead of C2.
+ unsigned Msb = Len == 1 ? 0 : C2;
+ if (tryUnsignedBitfieldInsertInZero(Node, DL, VT, X, Msb, Lsb))
+ return;
+
// (srli (slli c2+c3), c3)
if (OneUseOrZExtW && !IsCANDI) {
SDNode *SLLI = CurDAG->getMachineNode(
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
index 11d62e5edad3f..f199c2031b9a9 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
@@ -79,6 +79,8 @@ class RISCVDAGToDAGISel : public SelectionDAGISel {
bool trySignedBitfieldExtract(SDNode *Node);
bool tryUnsignedBitfieldExtract(SDNode *Node, SDLoc DL, MVT VT, SDValue X,
unsigned Msb, unsigned Lsb);
+ bool tryUnsignedBitfieldInsertInZero(SDNode *Node, SDLoc DL, MVT VT,
+ SDValue X, unsigned Msb, unsigned Lsb);
bool tryIndexedLoad(SDNode *Node);
bool selectShiftMask(SDValue N, unsigned ShiftWidth, SDValue &ShAmt);
diff --git a/llvm/test/CodeGen/RISCV/rv32xandesperf.ll b/llvm/test/CodeGen/RISCV/rv32xandesperf.ll
index 71473ab5dfb58..3996420d477b2 100644
--- a/llvm/test/CodeGen/RISCV/rv32xandesperf.ll
+++ b/llvm/test/CodeGen/RISCV/rv32xandesperf.ll
@@ -2,6 +2,10 @@
; RUN: llc -O0 -mtriple=riscv32 -mattr=+xandesperf -verify-machineinstrs < %s \
; RUN: | FileCheck %s
+; NDS.BFOZ
+
+; MSB >= LSB
+
define i32 @bfoz_from_and_i32(i32 %x) {
; CHECK-LABEL: bfoz_from_and_i32:
; CHECK: # %bb.0:
@@ -70,6 +74,54 @@ define i64 @bfoz_from_lshr_and_i64(i64 %x) {
ret i64 %shifted
}
+; MSB = 0
+
+define i32 @bfoz_from_and_shl_with_msb_zero_i32(i32 %x) {
+; CHECK-LABEL: bfoz_from_and_shl_with_msb_zero_i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: nds.bfoz a0, a0, 0, 15
+; CHECK-NEXT: ret
+ %shifted = shl i32 %x, 15
+ %masked = and i32 %shifted, 32768
+ ret i32 %masked
+}
+
+define i32 @bfoz_from_lshr_shl_with_msb_zero_i32(i32 %x) {
+; CHECK-LABEL: bfoz_from_lshr_shl_with_msb_zero_i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: nds.bfoz a0, a0, 0, 18
+; CHECK-NEXT: ret
+ %shl = shl i32 %x, 31
+ %lshr = lshr i32 %shl, 13
+ ret i32 %lshr
+}
+
+; MSB < LSB
+
+define i32 @bfoz_from_and_shl_i32(i32 %x) {
+; CHECK-LABEL: bfoz_from_and_shl_i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: nds.bfoz a0, a0, 12, 23
+; CHECK-NEXT: ret
+ %shifted = shl i32 %x, 12
+ %masked = and i32 %shifted, 16773120
+ ret i32 %masked
+}
+
+define i32 @bfoz_from_lshr_shl_i32(i32 %x) {
+; CHECK-LABEL: bfoz_from_lshr_shl_i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: nds.bfoz a0, a0, 19, 24
+; CHECK-NEXT: ret
+ %shl = shl i32 %x, 26
+ %lshr = lshr i32 %shl, 7
+ ret i32 %lshr
+}
+
+; NDS.BFOS
+
+; MSB >= LSB
+
define i32 @bfos_from_ashr_shl_i32(i32 %x) {
; CHECK-LABEL: bfos_from_ashr_shl_i32:
; CHECK: # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/rv64xandesperf.ll b/llvm/test/CodeGen/RISCV/rv64xandesperf.ll
index 260d30be686dc..af7c300a92d1f 100644
--- a/llvm/test/CodeGen/RISCV/rv64xandesperf.ll
+++ b/llvm/test/CodeGen/RISCV/rv64xandesperf.ll
@@ -2,6 +2,10 @@
; RUN: llc -mtriple=riscv64 -mattr=+xandesperf -verify-machineinstrs < %s \
; RUN: | FileCheck %s
+; NDS.BFOZ
+
+; MSB >= LSB
+
define i32 @bfoz_from_and_i32(i32 %x) {
; CHECK-LABEL: bfoz_from_and_i32:
; CHECK: # %bb.0:
@@ -60,6 +64,94 @@ define i64 @bfoz_from_lshr_and_i64(i64 %x) {
ret i64 %shifted
}
+; MSB = 0
+
+define i32 @bfoz_from_and_shl_with_msb_zero_i32(i32 %x) {
+; CHECK-LABEL: bfoz_from_and_shl_with_msb_zero_i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: nds.bfoz a0, a0, 0, 15
+; CHECK-NEXT: ret
+ %shifted = shl i32 %x, 15
+ %masked = and i32 %shifted, 32768
+ ret i32 %masked
+}
+
+define i64 @bfoz_from_and_shl_with_msb_zero_i64(i64 %x) {
+; CHECK-LABEL: bfoz_from_and_shl_with_msb_zero_i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: nds.bfoz a0, a0, 0, 48
+; CHECK-NEXT: ret
+ %shifted = shl i64 %x, 48
+ %masked = and i64 %shifted, 281474976710656
+ ret i64 %masked
+}
+
+define i32 @bfoz_from_lshr_shl_with_msb_zero_i32(i32 %x) {
+; CHECK-LABEL: bfoz_from_lshr_shl_with_msb_zero_i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: nds.bfoz a0, a0, 0, 18
+; CHECK-NEXT: ret
+ %shl = shl i32 %x, 31
+ %lshr = lshr i32 %shl, 13
+ ret i32 %lshr
+}
+
+define i64 @bfoz_from_lshr_shl_with_msb_zero_i64(i64 %x) {
+; CHECK-LABEL: bfoz_from_lshr_shl_with_msb_zero_i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: nds.bfoz a0, a0, 0, 44
+; CHECK-NEXT: ret
+ %shl = shl i64 %x, 63
+ %lshr = lshr i64 %shl, 19
+ ret i64 %lshr
+}
+
+; MSB < LSB
+
+define i32 @bfoz_from_and_shl_i32(i32 %x) {
+; CHECK-LABEL: bfoz_from_and_shl_i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: nds.bfoz a0, a0, 12, 23
+; CHECK-NEXT: ret
+ %shifted = shl i32 %x, 12
+ %masked = and i32 %shifted, 16773120
+ ret i32 %masked
+}
+
+define i64 @bfoz_from_and_shl_i64(i64 %x) {
+; CHECK-LABEL: bfoz_from_and_shl_i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: nds.bfoz a0, a0, 24, 35
+; CHECK-NEXT: ret
+ %shifted = shl i64 %x, 24
+ %masked = and i64 %shifted, 68702699520
+ ret i64 %masked
+}
+
+define i32 @bfoz_from_lshr_shl_i32(i32 %x) {
+; CHECK-LABEL: bfoz_from_lshr_shl_i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: nds.bfoz a0, a0, 19, 24
+; CHECK-NEXT: ret
+ %shl = shl i32 %x, 26
+ %lshr = lshr i32 %shl, 7
+ ret i32 %lshr
+}
+
+define i64 @bfoz_from_lshr_shl_i64(i64 %x) {
+; CHECK-LABEL: bfoz_from_lshr_shl_i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: nds.bfoz a0, a0, 25, 48
+; CHECK-NEXT: ret
+ %shl = shl i64 %x, 40
+ %lshr = lshr i64 %shl, 15
+ ret i64 %lshr
+}
+
+; NDS.BFOS
+
+; MSB >= LSB
+
define i32 @bfos_from_ashr_shl_i32(i32 %x) {
; CHECK-LABEL: bfos_from_ashr_shl_i32:
; CHECK: # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/rv64zba.ll b/llvm/test/CodeGen/RISCV/rv64zba.ll
index a4d3b80edbd58..c93dc1f502f23 100644
--- a/llvm/test/CodeGen/RISCV/rv64zba.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zba.ll
@@ -24,8 +24,7 @@ define i64 @slliuw(i64 %a) nounwind {
;
; RV64XANDESPERF-LABEL: slliuw:
; RV64XANDESPERF: # %bb.0:
-; RV64XANDESPERF-NEXT: slli a0, a0, 32
-; RV64XANDESPERF-NEXT: srli a0, a0, 31
+; RV64XANDESPERF-NEXT: nds.bfoz a0, a0, 1, 32
; RV64XANDESPERF-NEXT: ret
%conv1 = shl i64 %a, 1
%shl = and i64 %conv1, 8589934590
|
@@ -688,6 +688,23 @@ bool RISCVDAGToDAGISel::tryUnsignedBitfieldExtract(SDNode *Node, SDLoc DL, | |||
return true; | |||
} | |||
|
|||
bool RISCVDAGToDAGISel::tryUnsignedBitfieldInsertInZero(SDNode *Node, SDLoc DL, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we can just inline this.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I would prefer not. It's good to name it, and it provides a single place for other vendors to add their logic for their instructions here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If we keep it, I would like the lsb/msb arguments to truly be least significant bit and most significant bit of the insertion. The encoding tricks used by the Andes instruction should be done inside the function.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Agree with @lenary's point and have updated the lsb/msb arguments/variables to accurately represent the least significant bit and most significant bit of the insertion.
const unsigned Len = XLen - Leading - C2; | ||
const unsigned Lsb = XLen - Leading - 1; | ||
// If Len is 1, the Msb will be 0 instead of C2. | ||
unsigned Msb = Len == 1 ? 0 : C2; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this the same as unsigned Msb = Lsb == C2 ? 0 : C2
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks. It looks more readable.
and most significant bit of the insertion.
// Msb = 32 - 8 - 1 = 23 and Lsb = 12 | ||
const unsigned Msb = XLen - Leading - 1; | ||
// If Msb is equal to C2, the Lsb will be 0 instead of C2. | ||
unsigned Lsb = Msb == C2 ? 0 : C2; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we set Lsb = C2
here and move the selection of 0 into tryUnsignedBitfieldInsertInZero
. This behavior may specific to Andes.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done. Thanks.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/51/builds/17465 Here is the relevant piece of the build log for the reference
|
This patch is similar to #142737 The XAndesPerf extension includes signed bitfield extraction instruction `NDS.BFOS, which can extract the bits from 0 to Len - 1, place them starting at bit Lsb, zero-filled the bits from 0 to Lsb -1, and sign-extend the result. When Lsb == Msb, it is a special case where the Lsb will be set to 0 instead of being equal to the Msb.
The XAndesPerf extension includes unsigned bitfield extraction instruction `NDS.BFOZ`, which can extract the bits from 0 to Len -1, place them starting at bit Msb, and zero-fills the remaining bits. This patch handles the cases where Msb < Lsb for `NDS.BFOZ`. Instruction Sytax: nds.bfoz Rd, Rs1, Msb, Lsb The operation is: if Msb < Lsb: Lenm1 = Lsb - Msb; Rd[Lsb:Msb] = Rs1[Lenm1:0]; if (Lsb < (XLen -1)) Rd[XLen-1:Lsb+1]=0; Rd[Msb-1:0]=0; When Len == 1, it is a special case where the Msb is set to 0 instead of being equal to the Lsb.
This patch is similar to llvm#142737 The XAndesPerf extension includes signed bitfield extraction instruction `NDS.BFOS, which can extract the bits from 0 to Len - 1, place them starting at bit Lsb, zero-filled the bits from 0 to Lsb -1, and sign-extend the result. When Lsb == Msb, it is a special case where the Lsb will be set to 0 instead of being equal to the Msb.
The XAndesPerf extension includes unsigned bitfield extraction instruction `NDS.BFOZ`, which can extract the bits from 0 to Len -1, place them starting at bit Msb, and zero-fills the remaining bits. This patch handles the cases where Msb < Lsb for `NDS.BFOZ`. Instruction Sytax: nds.bfoz Rd, Rs1, Msb, Lsb The operation is: if Msb < Lsb: Lenm1 = Lsb - Msb; Rd[Lsb:Msb] = Rs1[Lenm1:0]; if (Lsb < (XLen -1)) Rd[XLen-1:Lsb+1]=0; Rd[Msb-1:0]=0; When Len == 1, it is a special case where the Msb is set to 0 instead of being equal to the Lsb.
This patch is similar to llvm#142737 The XAndesPerf extension includes signed bitfield extraction instruction `NDS.BFOS, which can extract the bits from 0 to Len - 1, place them starting at bit Lsb, zero-filled the bits from 0 to Lsb -1, and sign-extend the result. When Lsb == Msb, it is a special case where the Lsb will be set to 0 instead of being equal to the Msb.
This patch is similar to llvm#142737 The XAndesPerf extension includes signed bitfield extraction instruction `NDS.BFOS, which can extract the bits from 0 to Len - 1, place them starting at bit Lsb, zero-filled the bits from 0 to Lsb -1, and sign-extend the result. When Lsb == Msb, it is a special case where the Lsb will be set to 0 instead of being equal to the Msb.
The XAndesPerf extension includes unsigned bitfield extraction
instruction
NDS.BFOZ
, which can extract the bits from 0 to Len -1,place them starting at bit Msb, and zero-fills the remaining bits.
This patch handles the cases where Msb < Lsb.
Instruction Sytax:
The operation is:
When Len == 1, it is a special case where the Msb is set to 0 instead of
being equal to the Lsb.