-
Notifications
You must be signed in to change notification settings - Fork 14.5k
[GlobalIsel] Combine select to integer minmax (second attempt). #77520
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||
---|---|---|---|---|
|
@@ -6548,6 +6548,87 @@ bool CombinerHelper::tryFoldBoolSelectToLogic(GSelect *Select, | |||
return false; | ||||
} | ||||
|
||||
bool CombinerHelper::tryFoldSelectToIntMinMax(GSelect *Select, | ||||
BuildFnTy &MatchInfo) { | ||||
Register DstReg = Select->getReg(0); | ||||
Register Cond = Select->getCondReg(); | ||||
Register True = Select->getTrueReg(); | ||||
Register False = Select->getFalseReg(); | ||||
LLT DstTy = MRI.getType(DstReg); | ||||
|
||||
// We need an G_ICMP on the condition register. | ||||
GICmp *Cmp = getOpcodeDef<GICmp>(Cond, MRI); | ||||
if (!Cmp) | ||||
return false; | ||||
|
||||
// We want to fold the icmp and replace the select. | ||||
if (!MRI.hasOneNonDBGUse(Cmp->getReg(0))) | ||||
return false; | ||||
|
||||
CmpInst::Predicate Pred = Cmp->getCond(); | ||||
// We need a larger or smaller predicate for | ||||
// canonicalization. | ||||
if (CmpInst::isEquality(Pred)) | ||||
return false; | ||||
|
||||
Register CmpLHS = Cmp->getLHSReg(); | ||||
Register CmpRHS = Cmp->getRHSReg(); | ||||
|
||||
// We can swap CmpLHS and CmpRHS for higher hitrate. | ||||
if (True == CmpRHS && False == CmpLHS) { | ||||
std::swap(CmpLHS, CmpRHS); | ||||
Pred = CmpInst::getSwappedPredicate(Pred); | ||||
} | ||||
|
||||
// (icmp X, Y) ? X : Y -> integer minmax. | ||||
// see matchSelectPattern in ValueTracking. | ||||
// Legality between G_SELECT and integer minmax can differ. | ||||
if (True == CmpLHS && False == CmpRHS) { | ||||
switch (Pred) { | ||||
case ICmpInst::ICMP_UGT: | ||||
case ICmpInst::ICMP_UGE: { | ||||
if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMAX, DstTy})) | ||||
return false; | ||||
MatchInfo = [=](MachineIRBuilder &B) { | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||||
B.buildUMax(DstReg, True, False); | ||||
}; | ||||
return true; | ||||
} | ||||
case ICmpInst::ICMP_SGT: | ||||
case ICmpInst::ICMP_SGE: { | ||||
if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMAX, DstTy})) | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The API works for combining selects. The result has the same type as the select. It fails for e.g. combining and/or of 2 icmps into one icmp.
|
||||
return false; | ||||
MatchInfo = [=](MachineIRBuilder &B) { | ||||
B.buildSMax(DstReg, True, False); | ||||
}; | ||||
return true; | ||||
} | ||||
case ICmpInst::ICMP_ULT: | ||||
case ICmpInst::ICMP_ULE: { | ||||
if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMIN, DstTy})) | ||||
return false; | ||||
MatchInfo = [=](MachineIRBuilder &B) { | ||||
B.buildUMin(DstReg, True, False); | ||||
}; | ||||
return true; | ||||
} | ||||
case ICmpInst::ICMP_SLT: | ||||
case ICmpInst::ICMP_SLE: { | ||||
if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMIN, DstTy})) | ||||
return false; | ||||
MatchInfo = [=](MachineIRBuilder &B) { | ||||
B.buildSMin(DstReg, True, False); | ||||
}; | ||||
return true; | ||||
} | ||||
default: | ||||
return false; | ||||
} | ||||
} | ||||
|
||||
return false; | ||||
} | ||||
|
||||
bool CombinerHelper::matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo) { | ||||
GSelect *Select = cast<GSelect>(&MI); | ||||
|
||||
|
@@ -6557,5 +6638,8 @@ bool CombinerHelper::matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo) { | |||
if (tryFoldBoolSelectToLogic(Select, MatchInfo)) | ||||
return true; | ||||
|
||||
if (tryFoldSelectToIntMinMax(Select, MatchInfo)) | ||||
return true; | ||||
|
||||
return false; | ||||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2421,7 +2421,7 @@ define i8 @atomicrmw_min_i8(ptr %ptr, i8 %rhs) { | |
; CHECK-NOLSE-O1-NEXT: ldaxrb w8, [x0] | ||
; CHECK-NOLSE-O1-NEXT: sxtb w9, w8 | ||
; CHECK-NOLSE-O1-NEXT: cmp w9, w1, sxtb | ||
; CHECK-NOLSE-O1-NEXT: csel w9, w8, w1, le | ||
; CHECK-NOLSE-O1-NEXT: csel w9, w8, w1, lt | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. le : Signed less than or equal |
||
; CHECK-NOLSE-O1-NEXT: stxrb w10, w9, [x0] | ||
; CHECK-NOLSE-O1-NEXT: cbnz w10, LBB33_1 | ||
; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %atomicrmw.end | ||
|
@@ -2435,7 +2435,7 @@ define i8 @atomicrmw_min_i8(ptr %ptr, i8 %rhs) { | |
; CHECK-OUTLINE-O1-NEXT: ldaxrb w8, [x0] | ||
; CHECK-OUTLINE-O1-NEXT: sxtb w9, w8 | ||
; CHECK-OUTLINE-O1-NEXT: cmp w9, w1, sxtb | ||
; CHECK-OUTLINE-O1-NEXT: csel w9, w8, w1, le | ||
; CHECK-OUTLINE-O1-NEXT: csel w9, w8, w1, lt | ||
; CHECK-OUTLINE-O1-NEXT: stxrb w10, w9, [x0] | ||
; CHECK-OUTLINE-O1-NEXT: cbnz w10, LBB33_1 | ||
; CHECK-OUTLINE-O1-NEXT: ; %bb.2: ; %atomicrmw.end | ||
|
@@ -2662,7 +2662,7 @@ define i8 @atomicrmw_umin_i8(ptr %ptr, i8 %rhs) { | |
; CHECK-NOLSE-O1-NEXT: ldaxrb w8, [x0] | ||
; CHECK-NOLSE-O1-NEXT: and w10, w8, #0xff | ||
; CHECK-NOLSE-O1-NEXT: cmp w10, w9 | ||
; CHECK-NOLSE-O1-NEXT: csel w10, w10, w9, ls | ||
; CHECK-NOLSE-O1-NEXT: csel w10, w10, w9, lo | ||
; CHECK-NOLSE-O1-NEXT: stlxrb w11, w10, [x0] | ||
; CHECK-NOLSE-O1-NEXT: cbnz w11, LBB35_1 | ||
; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %atomicrmw.end | ||
|
@@ -2677,7 +2677,7 @@ define i8 @atomicrmw_umin_i8(ptr %ptr, i8 %rhs) { | |
; CHECK-OUTLINE-O1-NEXT: ldaxrb w8, [x0] | ||
; CHECK-OUTLINE-O1-NEXT: and w10, w8, #0xff | ||
; CHECK-OUTLINE-O1-NEXT: cmp w10, w9 | ||
; CHECK-OUTLINE-O1-NEXT: csel w10, w10, w9, ls | ||
; CHECK-OUTLINE-O1-NEXT: csel w10, w10, w9, lo | ||
; CHECK-OUTLINE-O1-NEXT: stlxrb w11, w10, [x0] | ||
; CHECK-OUTLINE-O1-NEXT: cbnz w11, LBB35_1 | ||
; CHECK-OUTLINE-O1-NEXT: ; %bb.2: ; %atomicrmw.end | ||
|
@@ -3477,7 +3477,7 @@ define i16 @atomicrmw_min_i16(ptr %ptr, i16 %rhs) { | |
; CHECK-NOLSE-O1-NEXT: ldaxrh w8, [x0] | ||
; CHECK-NOLSE-O1-NEXT: sxth w9, w8 | ||
; CHECK-NOLSE-O1-NEXT: cmp w9, w1, sxth | ||
; CHECK-NOLSE-O1-NEXT: csel w9, w8, w1, le | ||
; CHECK-NOLSE-O1-NEXT: csel w9, w8, w1, lt | ||
; CHECK-NOLSE-O1-NEXT: stxrh w10, w9, [x0] | ||
; CHECK-NOLSE-O1-NEXT: cbnz w10, LBB43_1 | ||
; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %atomicrmw.end | ||
|
@@ -3491,7 +3491,7 @@ define i16 @atomicrmw_min_i16(ptr %ptr, i16 %rhs) { | |
; CHECK-OUTLINE-O1-NEXT: ldaxrh w8, [x0] | ||
; CHECK-OUTLINE-O1-NEXT: sxth w9, w8 | ||
; CHECK-OUTLINE-O1-NEXT: cmp w9, w1, sxth | ||
; CHECK-OUTLINE-O1-NEXT: csel w9, w8, w1, le | ||
; CHECK-OUTLINE-O1-NEXT: csel w9, w8, w1, lt | ||
; CHECK-OUTLINE-O1-NEXT: stxrh w10, w9, [x0] | ||
; CHECK-OUTLINE-O1-NEXT: cbnz w10, LBB43_1 | ||
; CHECK-OUTLINE-O1-NEXT: ; %bb.2: ; %atomicrmw.end | ||
|
@@ -3718,7 +3718,7 @@ define i16 @atomicrmw_umin_i16(ptr %ptr, i16 %rhs) { | |
; CHECK-NOLSE-O1-NEXT: ldaxrh w8, [x0] | ||
; CHECK-NOLSE-O1-NEXT: and w10, w8, #0xffff | ||
; CHECK-NOLSE-O1-NEXT: cmp w10, w9 | ||
; CHECK-NOLSE-O1-NEXT: csel w10, w10, w9, ls | ||
; CHECK-NOLSE-O1-NEXT: csel w10, w10, w9, lo | ||
; CHECK-NOLSE-O1-NEXT: stlxrh w11, w10, [x0] | ||
; CHECK-NOLSE-O1-NEXT: cbnz w11, LBB45_1 | ||
; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %atomicrmw.end | ||
|
@@ -3733,7 +3733,7 @@ define i16 @atomicrmw_umin_i16(ptr %ptr, i16 %rhs) { | |
; CHECK-OUTLINE-O1-NEXT: ldaxrh w8, [x0] | ||
; CHECK-OUTLINE-O1-NEXT: and w10, w8, #0xffff | ||
; CHECK-OUTLINE-O1-NEXT: cmp w10, w9 | ||
; CHECK-OUTLINE-O1-NEXT: csel w10, w10, w9, ls | ||
; CHECK-OUTLINE-O1-NEXT: csel w10, w10, w9, lo | ||
; CHECK-OUTLINE-O1-NEXT: stlxrh w11, w10, [x0] | ||
; CHECK-OUTLINE-O1-NEXT: cbnz w11, LBB45_1 | ||
; CHECK-OUTLINE-O1-NEXT: ; %bb.2: ; %atomicrmw.end | ||
|
@@ -4526,7 +4526,7 @@ define i32 @atomicrmw_min_i32(ptr %ptr, i32 %rhs) { | |
; CHECK-NOLSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1 | ||
; CHECK-NOLSE-O1-NEXT: ldaxr w8, [x0] | ||
; CHECK-NOLSE-O1-NEXT: cmp w8, w1 | ||
; CHECK-NOLSE-O1-NEXT: csel w9, w8, w1, le | ||
; CHECK-NOLSE-O1-NEXT: csel w9, w8, w1, lt | ||
; CHECK-NOLSE-O1-NEXT: stxr w10, w9, [x0] | ||
; CHECK-NOLSE-O1-NEXT: cbnz w10, LBB53_1 | ||
; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %atomicrmw.end | ||
|
@@ -4539,7 +4539,7 @@ define i32 @atomicrmw_min_i32(ptr %ptr, i32 %rhs) { | |
; CHECK-OUTLINE-O1-NEXT: ; =>This Inner Loop Header: Depth=1 | ||
; CHECK-OUTLINE-O1-NEXT: ldaxr w8, [x0] | ||
; CHECK-OUTLINE-O1-NEXT: cmp w8, w1 | ||
; CHECK-OUTLINE-O1-NEXT: csel w9, w8, w1, le | ||
; CHECK-OUTLINE-O1-NEXT: csel w9, w8, w1, lt | ||
; CHECK-OUTLINE-O1-NEXT: stxr w10, w9, [x0] | ||
; CHECK-OUTLINE-O1-NEXT: cbnz w10, LBB53_1 | ||
; CHECK-OUTLINE-O1-NEXT: ; %bb.2: ; %atomicrmw.end | ||
|
@@ -4754,7 +4754,7 @@ define i32 @atomicrmw_umin_i32(ptr %ptr, i32 %rhs) { | |
; CHECK-NOLSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1 | ||
; CHECK-NOLSE-O1-NEXT: ldaxr w8, [x0] | ||
; CHECK-NOLSE-O1-NEXT: cmp w8, w1 | ||
; CHECK-NOLSE-O1-NEXT: csel w9, w8, w1, ls | ||
; CHECK-NOLSE-O1-NEXT: csel w9, w8, w1, lo | ||
; CHECK-NOLSE-O1-NEXT: stlxr w10, w9, [x0] | ||
; CHECK-NOLSE-O1-NEXT: cbnz w10, LBB55_1 | ||
; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %atomicrmw.end | ||
|
@@ -4767,7 +4767,7 @@ define i32 @atomicrmw_umin_i32(ptr %ptr, i32 %rhs) { | |
; CHECK-OUTLINE-O1-NEXT: ; =>This Inner Loop Header: Depth=1 | ||
; CHECK-OUTLINE-O1-NEXT: ldaxr w8, [x0] | ||
; CHECK-OUTLINE-O1-NEXT: cmp w8, w1 | ||
; CHECK-OUTLINE-O1-NEXT: csel w9, w8, w1, ls | ||
; CHECK-OUTLINE-O1-NEXT: csel w9, w8, w1, lo | ||
; CHECK-OUTLINE-O1-NEXT: stlxr w10, w9, [x0] | ||
; CHECK-OUTLINE-O1-NEXT: cbnz w10, LBB55_1 | ||
; CHECK-OUTLINE-O1-NEXT: ; %bb.2: ; %atomicrmw.end | ||
|
@@ -5547,7 +5547,7 @@ define i64 @atomicrmw_min_i64(ptr %ptr, i64 %rhs) { | |
; CHECK-NOLSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1 | ||
; CHECK-NOLSE-O1-NEXT: ldaxr x8, [x0] | ||
; CHECK-NOLSE-O1-NEXT: cmp x8, x1 | ||
; CHECK-NOLSE-O1-NEXT: csel x9, x8, x1, le | ||
; CHECK-NOLSE-O1-NEXT: csel x9, x8, x1, lt | ||
; CHECK-NOLSE-O1-NEXT: stxr w10, x9, [x0] | ||
; CHECK-NOLSE-O1-NEXT: cbnz w10, LBB63_1 | ||
; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %atomicrmw.end | ||
|
@@ -5560,7 +5560,7 @@ define i64 @atomicrmw_min_i64(ptr %ptr, i64 %rhs) { | |
; CHECK-OUTLINE-O1-NEXT: ; =>This Inner Loop Header: Depth=1 | ||
; CHECK-OUTLINE-O1-NEXT: ldaxr x8, [x0] | ||
; CHECK-OUTLINE-O1-NEXT: cmp x8, x1 | ||
; CHECK-OUTLINE-O1-NEXT: csel x9, x8, x1, le | ||
; CHECK-OUTLINE-O1-NEXT: csel x9, x8, x1, lt | ||
; CHECK-OUTLINE-O1-NEXT: stxr w10, x9, [x0] | ||
; CHECK-OUTLINE-O1-NEXT: cbnz w10, LBB63_1 | ||
; CHECK-OUTLINE-O1-NEXT: ; %bb.2: ; %atomicrmw.end | ||
|
@@ -5775,7 +5775,7 @@ define i64 @atomicrmw_umin_i64(ptr %ptr, i64 %rhs) { | |
; CHECK-NOLSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1 | ||
; CHECK-NOLSE-O1-NEXT: ldaxr x8, [x0] | ||
; CHECK-NOLSE-O1-NEXT: cmp x8, x1 | ||
; CHECK-NOLSE-O1-NEXT: csel x9, x8, x1, ls | ||
; CHECK-NOLSE-O1-NEXT: csel x9, x8, x1, lo | ||
; CHECK-NOLSE-O1-NEXT: stlxr w10, x9, [x0] | ||
; CHECK-NOLSE-O1-NEXT: cbnz w10, LBB65_1 | ||
; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %atomicrmw.end | ||
|
@@ -5788,7 +5788,7 @@ define i64 @atomicrmw_umin_i64(ptr %ptr, i64 %rhs) { | |
; CHECK-OUTLINE-O1-NEXT: ; =>This Inner Loop Header: Depth=1 | ||
; CHECK-OUTLINE-O1-NEXT: ldaxr x8, [x0] | ||
; CHECK-OUTLINE-O1-NEXT: cmp x8, x1 | ||
; CHECK-OUTLINE-O1-NEXT: csel x9, x8, x1, ls | ||
; CHECK-OUTLINE-O1-NEXT: csel x9, x8, x1, lo | ||
; CHECK-OUTLINE-O1-NEXT: stlxr w10, x9, [x0] | ||
; CHECK-OUTLINE-O1-NEXT: cbnz w10, LBB65_1 | ||
; CHECK-OUTLINE-O1-NEXT: ; %bb.2: ; %atomicrmw.end | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
one use one icmp.