-
Notifications
You must be signed in to change notification settings - Fork 14.5k
[AMDGPU][SDAG] Handle ISD::PTRADD in VOP3 patterns #143881
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -514,12 +514,13 @@ let OtherPredicates = [isGFX10Plus, Has16BitInsts], True16Predicate = NotHasTrue | |
defm: Ternary_i16_Pats_gfx9<mul, add, V_MAD_U16_gfx9_e64>; | ||
} // End OtherPredicates = [isGFX10Plus, Has16BitInsts], True16Predicate = NotHasTrue16BitInsts | ||
|
||
class ThreeOpFragSDAG<SDPatternOperator op1, SDPatternOperator op2> : PatFrag< | ||
class ThreeOpFragSDAG<SDPatternOperator op1, SDPatternOperator op2, bit op1IsRight = 0> : PatFrag< | ||
(ops node:$x, node:$y, node:$z), | ||
// When the inner operation is used multiple times, selecting 3-op | ||
// instructions may still be beneficial -- if the other users can be | ||
// combined similarly. Let's be conservative for now. | ||
(op2 (HasOneUseBinOp<op1> node:$x, node:$y), node:$z), | ||
!if(op1IsRight, (op2 node:$z, (HasOneUseBinOp<op1> node:$x, node:$y)), | ||
(op2 (HasOneUseBinOp<op1> node:$x, node:$y), node:$z)), | ||
[{ | ||
// Only use VALU ops when the result is divergent. | ||
if (!N->isDivergent()) | ||
|
@@ -546,7 +547,10 @@ class ThreeOpFragSDAG<SDPatternOperator op1, SDPatternOperator op2> : PatFrag< | |
let PredicateCodeUsesOperands = 1; | ||
} | ||
|
||
class ThreeOpFrag<SDPatternOperator op1, SDPatternOperator op2> : ThreeOpFragSDAG<op1, op2> { | ||
// Matches (op2 (op1 x, y), z) if op1IsRight = 0 and | ||
// matches (op2 z, (op1, x, y)) if op1IsRight = 1. | ||
class ThreeOpFrag<SDPatternOperator op1, SDPatternOperator op2, | ||
bit op1IsRight = 0> : ThreeOpFragSDAG<op1, op2, op1IsRight> { | ||
// The divergence predicate is irrelevant in GlobalISel, as we have | ||
// proper register bank checks. We just need to verify the constant | ||
// bus restriction when all the sources are considered. | ||
|
@@ -836,12 +840,19 @@ def : GCNPat< | |
(DivergentBinFrag<mul> i32:$src0, IsPow2Plus1:$src1), | ||
(V_LSHL_ADD_U32_e64 i32:$src0, (i32 (Log2_32 imm:$src1)), i32:$src0)>; | ||
|
||
let SubtargetPredicate = HasLshlAddU64Inst in | ||
let SubtargetPredicate = HasLshlAddU64Inst in { | ||
def : GCNPat< | ||
(ThreeOpFrag<shl_0_to_4, add> i64:$src0, i32:$src1, i64:$src2), | ||
(V_LSHL_ADD_U64_e64 VSrc_b64:$src0, VSrc_b32:$src1, VSrc_b64:$src2) | ||
>; | ||
|
||
def : GCNPat < | ||
// (ptradd z, (shl x, y)) -> ((x << y) + z) | ||
(ThreeOpFrag<shl_0_to_4, ptradd, /*op1IsRight=*/1> i64:$src0, i32:$src1, i64:$src2), | ||
(V_LSHL_ADD_U64_e64 VSrc_b64:$src0, VSrc_b32:$src1, VSrc_b64:$src2) | ||
>; | ||
} // End SubtargetPredicate = HasLshlAddU64Inst | ||
|
||
def : VOPBinOpClampPat<saddsat, V_ADD_I32_e64, i32>; | ||
def : VOPBinOpClampPat<ssubsat, V_SUB_I32_e64, i32>; | ||
|
||
|
@@ -910,19 +921,24 @@ multiclass IMAD32_Pats <VOP3_Pseudo inst> { | |
|
||
// Handle cases where amdgpu-codegenprepare-mul24 made a mul24 instead of a normal mul. | ||
// We need to separate this because otherwise OtherPredicates would be overriden. | ||
class IMAD32_Mul24_Pat<VOP3_Pseudo inst>: GCNPat < | ||
(i64 (add (i64 (AMDGPUmul_u24 i32:$src0, i32:$src1)), i64:$src2)), | ||
(inst $src0, $src1, $src2, 0 /* clamp */) | ||
>; | ||
class IMAD32_Mul24_Pats_Impl<VOP3_Pseudo inst, SDPatternOperator AddOp, bit mulIsRight = 0> : GCNPat < | ||
!if(mulIsRight, (i64 (AddOp i64:$src2, (i64 (AMDGPUmul_u24 i32:$src0, i32:$src1)))), | ||
(i64 (AddOp (i64 (AMDGPUmul_u24 i32:$src0, i32:$src1)), i64:$src2))), | ||
Comment on lines
+925
to
+926
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should really avoid this, commutable is supposed to be automatic. It may require a special case for ptradd in tablegen itself There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What would be the behavior that we want from tablegen? Should the target be able to specify "PTRADD should be considered commutative in tablegen'erated ISel patterns"? I'd prefer a solution that expresses that ptradds on AMDGPU should be folded into the addressing mode, and if that's not possible, they should be replaced by an ISD::ADD node and the ADD matching rules should be applied. |
||
(inst $src0, $src1, $src2, 0 /* clamp */)>; | ||
|
||
multiclass IMAD32_Mul24_Pats<VOP3_Pseudo inst> { | ||
def : IMAD32_Mul24_Pats_Impl<inst, add>; | ||
def : IMAD32_Mul24_Pats_Impl<inst, ptradd, /*mulIsRight=*/1>; | ||
} | ||
|
||
// exclude pre-GFX9 where it was slow | ||
let OtherPredicates = [HasNotMADIntraFwdBug], SubtargetPredicate = isGFX9Plus in { | ||
defm : IMAD32_Pats<V_MAD_U64_U32_e64>; | ||
def : IMAD32_Mul24_Pat<V_MAD_U64_U32_e64>; | ||
defm : IMAD32_Mul24_Pats<V_MAD_U64_U32_e64>; | ||
} | ||
let OtherPredicates = [HasMADIntraFwdBug], SubtargetPredicate = isGFX11Only in { | ||
defm : IMAD32_Pats<V_MAD_U64_U32_gfx11_e64>; | ||
def : IMAD32_Mul24_Pat<V_MAD_U64_U32_gfx11_e64>; | ||
defm : IMAD32_Mul24_Pats<V_MAD_U64_U32_gfx11_e64>; | ||
} | ||
|
||
def VOP3_PERMLANE_Profile : VOP3_Profile<VOPProfile <[i32, i32, i32, i32]>, VOP3_OPSEL> { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You shouldn't need to explicitly commute the patterns, the pattern generator should do this for commutable nodes