Skip to content

Commit b8ebc11

Browse files
committed
[EarlyCSE] avoid crashing when detecting min/max/abs patterns (PR41083)
As discussed in PR41083: https://bugs.llvm.org/show_bug.cgi?id=41083 ...we can assert/crash in EarlyCSE using the current hashing scheme and instructions with flags. ValueTracking's matchSelectPattern() may rely on overflow (nsw, etc) or other flags when detecting patterns such as min/max/abs composed of compare+select. But the value numbering / hashing mechanism used by EarlyCSE intersects those flags to allow more CSE. Several alternatives to solve this are discussed in the bug report. This patch avoids the issue by doing simple matching of min/max/abs patterns that never requires instruction flags. We give up some CSE power because of that, but that is not expected to result in much actual performance difference because InstCombine will canonicalize these patterns when possible. It even has this comment for abs/nabs: /// Canonicalize all these variants to 1 pattern. /// This makes CSE more likely. (And this patch adds PhaseOrdering tests to verify that the expected transforms are still happening in the standard optimization pipelines. I left this code to use ValueTracking's "flavor" enum values, so we don't have to change the callers' code. If we decide to go back to using the ValueTracking call (by changing the hashing algorithm instead), it should be obvious how to replace this chunk. Differential Revision: https://reviews.llvm.org/D74285
1 parent 8b81ebf commit b8ebc11

File tree

3 files changed

+134
-15
lines changed

3 files changed

+134
-15
lines changed

llvm/lib/Transforms/Scalar/EarlyCSE.cpp

Lines changed: 44 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -152,13 +152,50 @@ static bool matchSelectWithOptionalNotCond(Value *V, Value *&Cond, Value *&A,
152152
std::swap(A, B);
153153
}
154154

155-
// Set flavor if we find a match, or set it to unknown otherwise; in
156-
// either case, return true to indicate that this is a select we can
157-
// process.
158-
if (auto *CmpI = dyn_cast<ICmpInst>(Cond))
159-
Flavor = matchDecomposedSelectPattern(CmpI, A, B, A, B).Flavor;
160-
else
161-
Flavor = SPF_UNKNOWN;
155+
// Match canonical forms of abs/nabs/min/max. We are not using ValueTracking's
156+
// more powerful matchSelectPattern() because it may rely on instruction flags
157+
// such as "nsw". That would be incompatible with the current hashing
158+
// mechanism that may remove flags to increase the likelihood of CSE.
159+
160+
// These are the canonical forms of abs(X) and nabs(X) created by instcombine:
161+
// %N = sub i32 0, %X
162+
// %C = icmp slt i32 %X, 0
163+
// %ABS = select i1 %C, i32 %N, i32 %X
164+
//
165+
// %N = sub i32 0, %X
166+
// %C = icmp slt i32 %X, 0
167+
// %NABS = select i1 %C, i32 %X, i32 %N
168+
Flavor = SPF_UNKNOWN;
169+
CmpInst::Predicate Pred;
170+
if (match(Cond, m_ICmp(Pred, m_Specific(B), m_ZeroInt())) &&
171+
Pred == ICmpInst::ICMP_SLT && match(A, m_Neg(m_Specific(B)))) {
172+
// ABS: B < 0 ? -B : B
173+
Flavor = SPF_ABS;
174+
return true;
175+
}
176+
if (match(Cond, m_ICmp(Pred, m_Specific(A), m_ZeroInt())) &&
177+
Pred == ICmpInst::ICMP_SLT && match(B, m_Neg(m_Specific(A)))) {
178+
// NABS: A < 0 ? A : -A
179+
Flavor = SPF_NABS;
180+
return true;
181+
}
182+
183+
if (!match(Cond, m_ICmp(Pred, m_Specific(A), m_Specific(B)))) {
184+
// Check for commuted variants of min/max by swapping predicate.
185+
// If we do not match the standard or commuted patterns, this is not a
186+
// recognized form of min/max, but it is still a select, so return true.
187+
if (!match(Cond, m_ICmp(Pred, m_Specific(B), m_Specific(A))))
188+
return true;
189+
Pred = ICmpInst::getSwappedPredicate(Pred);
190+
}
191+
192+
switch (Pred) {
193+
case CmpInst::ICMP_UGT: Flavor = SPF_UMAX; break;
194+
case CmpInst::ICMP_ULT: Flavor = SPF_UMIN; break;
195+
case CmpInst::ICMP_SGT: Flavor = SPF_SMAX; break;
196+
case CmpInst::ICMP_SLT: Flavor = SPF_SMIN; break;
197+
default: break;
198+
}
162199

163200
return true;
164201
}

llvm/test/Transforms/EarlyCSE/commute.ll

Lines changed: 79 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -279,14 +279,18 @@ define i1 @umax_inverted(i8 %a, i8 %b) {
279279
}
280280

281281
; Min/max may exist with non-canonical operands. Value tracking can match those.
282+
; But we do not use value tracking, so we expect instcombine will canonicalize
283+
; this code to a form that allows CSE.
282284

283285
define i8 @smax_nsw(i8 %a, i8 %b) {
284286
; CHECK-LABEL: @smax_nsw(
285287
; CHECK-NEXT: [[SUB:%.*]] = sub nsw i8 [[A:%.*]], [[B:%.*]]
286288
; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i8 [[A]], [[B]]
287289
; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i8 [[SUB]], 0
288290
; CHECK-NEXT: [[M1:%.*]] = select i1 [[CMP1]], i8 0, i8 [[SUB]]
289-
; CHECK-NEXT: ret i8 0
291+
; CHECK-NEXT: [[M2:%.*]] = select i1 [[CMP2]], i8 [[SUB]], i8 0
292+
; CHECK-NEXT: [[R:%.*]] = sub i8 [[M2]], [[M1]]
293+
; CHECK-NEXT: ret i8 [[R]]
290294
;
291295
%sub = sub nsw i8 %a, %b
292296
%cmp1 = icmp slt i8 %a, %b
@@ -297,13 +301,19 @@ define i8 @smax_nsw(i8 %a, i8 %b) {
297301
ret i8 %r
298302
}
299303

304+
; Abs/nabs may exist with non-canonical operands. Value tracking can match those.
305+
; But we do not use value tracking, so we expect instcombine will canonicalize
306+
; this code to a form that allows CSE.
307+
300308
define i8 @abs_swapped(i8 %a) {
301309
; CHECK-LABEL: @abs_swapped(
302310
; CHECK-NEXT: [[NEG:%.*]] = sub i8 0, [[A:%.*]]
303311
; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i8 [[A]], 0
304312
; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i8 [[A]], 0
305313
; CHECK-NEXT: [[M1:%.*]] = select i1 [[CMP1]], i8 [[A]], i8 [[NEG]]
306-
; CHECK-NEXT: ret i8 [[M1]]
314+
; CHECK-NEXT: [[M2:%.*]] = select i1 [[CMP2]], i8 [[NEG]], i8 [[A]]
315+
; CHECK-NEXT: [[R:%.*]] = or i8 [[M2]], [[M1]]
316+
; CHECK-NEXT: ret i8 [[R]]
307317
;
308318
%neg = sub i8 0, %a
309319
%cmp1 = icmp sgt i8 %a, 0
@@ -331,13 +341,19 @@ define i8 @abs_inverted(i8 %a) {
331341
ret i8 %r
332342
}
333343

344+
; Abs/nabs may exist with non-canonical operands. Value tracking can match those.
345+
; But we do not use value tracking, so we expect instcombine will canonicalize
346+
; this code to a form that allows CSE.
347+
334348
define i8 @nabs_swapped(i8 %a) {
335349
; CHECK-LABEL: @nabs_swapped(
336350
; CHECK-NEXT: [[NEG:%.*]] = sub i8 0, [[A:%.*]]
337351
; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i8 [[A]], 0
338352
; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i8 [[A]], 0
339353
; CHECK-NEXT: [[M1:%.*]] = select i1 [[CMP1]], i8 [[A]], i8 [[NEG]]
340-
; CHECK-NEXT: ret i8 0
354+
; CHECK-NEXT: [[M2:%.*]] = select i1 [[CMP2]], i8 [[NEG]], i8 [[A]]
355+
; CHECK-NEXT: [[R:%.*]] = xor i8 [[M2]], [[M1]]
356+
; CHECK-NEXT: ret i8 [[R]]
341357
;
342358
%neg = sub i8 0, %a
343359
%cmp1 = icmp slt i8 %a, 0
@@ -365,15 +381,20 @@ define i8 @nabs_inverted(i8 %a) {
365381
ret i8 %r
366382
}
367383

368-
; These two tests make sure we still consider it a match when the RHS of the
384+
; Abs/nabs may exist with non-canonical operands. Value tracking can match those.
385+
; But we do not use value tracking, so we expect instcombine will canonicalize
386+
; this code to a form that allows CSE.
387+
369388
; compares are different.
370389
define i8 @abs_different_constants(i8 %a) {
371390
; CHECK-LABEL: @abs_different_constants(
372391
; CHECK-NEXT: [[NEG:%.*]] = sub i8 0, [[A:%.*]]
373392
; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i8 [[A]], -1
374393
; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i8 [[A]], 0
375394
; CHECK-NEXT: [[M1:%.*]] = select i1 [[CMP1]], i8 [[A]], i8 [[NEG]]
376-
; CHECK-NEXT: ret i8 [[M1]]
395+
; CHECK-NEXT: [[M2:%.*]] = select i1 [[CMP2]], i8 [[NEG]], i8 [[A]]
396+
; CHECK-NEXT: [[R:%.*]] = or i8 [[M2]], [[M1]]
397+
; CHECK-NEXT: ret i8 [[R]]
377398
;
378399
%neg = sub i8 0, %a
379400
%cmp1 = icmp sgt i8 %a, -1
@@ -384,13 +405,19 @@ define i8 @abs_different_constants(i8 %a) {
384405
ret i8 %r
385406
}
386407

408+
; Abs/nabs may exist with non-canonical operands. Value tracking can match those.
409+
; But we do not use value tracking, so we expect instcombine will canonicalize
410+
; this code to a form that allows CSE.
411+
387412
define i8 @nabs_different_constants(i8 %a) {
388413
; CHECK-LABEL: @nabs_different_constants(
389414
; CHECK-NEXT: [[NEG:%.*]] = sub i8 0, [[A:%.*]]
390415
; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i8 [[A]], 0
391416
; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i8 [[A]], -1
392417
; CHECK-NEXT: [[M1:%.*]] = select i1 [[CMP1]], i8 [[A]], i8 [[NEG]]
393-
; CHECK-NEXT: ret i8 0
418+
; CHECK-NEXT: [[M2:%.*]] = select i1 [[CMP2]], i8 [[NEG]], i8 [[A]]
419+
; CHECK-NEXT: [[R:%.*]] = xor i8 [[M2]], [[M1]]
420+
; CHECK-NEXT: ret i8 [[R]]
394421
;
395422
%neg = sub i8 0, %a
396423
%cmp1 = icmp slt i8 %a, 0
@@ -689,3 +716,49 @@ define void @not_not_min(i32* %px, i32* %py, i32* %pout) {
689716

690717
ret void
691718
}
719+
720+
; This would cause an assert/crash because we matched
721+
; a ValueTracking select pattern that required 'nsw'
722+
; on an operand, but we remove that flag as part of
723+
; CSE matching/hashing.
724+
725+
define void @PR41083_1(i32 %span_left, i32 %clip_left) {
726+
; CHECK-LABEL: @PR41083_1(
727+
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CLIP_LEFT:%.*]], [[SPAN_LEFT:%.*]]
728+
; CHECK-NEXT: [[SUB:%.*]] = sub i32 [[CLIP_LEFT]], [[SPAN_LEFT]]
729+
; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 0
730+
; CHECK-NEXT: ret void
731+
;
732+
%cmp = icmp sgt i32 %clip_left, %span_left
733+
%sub = sub nsw i32 %clip_left, %span_left
734+
%cond = select i1 %cmp, i32 %sub, i32 0
735+
%cmp83292 = icmp slt i32 %cond, undef
736+
%sub2 = sub i32 %clip_left, %span_left
737+
%sel2 = select i1 %cmp, i32 %sub2, i32 0
738+
ret void
739+
}
740+
741+
; This would cause an assert/crash because we matched
742+
; a ValueTracking select pattern that required 'nsw'
743+
; on an operand, but we remove that flag as part of
744+
; CSE matching/hashing.
745+
746+
define i32 @PR41083_2(i32 %p) {
747+
; CHECK-LABEL: @PR41083_2(
748+
; CHECK-NEXT: [[S:%.*]] = sub i32 0, [[P:%.*]]
749+
; CHECK-NEXT: [[A:%.*]] = ashr exact i32 [[S]], 2
750+
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 0, [[A]]
751+
; CHECK-NEXT: [[SUB:%.*]] = sub i32 0, [[A]]
752+
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 0
753+
; CHECK-NEXT: [[M:%.*]] = mul i32 [[SEL]], [[SUB]]
754+
; CHECK-NEXT: ret i32 [[M]]
755+
;
756+
%s = sub i32 0, %p
757+
%a = ashr exact i32 %s, 2
758+
%cmp = icmp sgt i32 0, %a
759+
%sub = sub nsw i32 0, %a
760+
%sel = select i1 %cmp, i32 %sub, i32 0
761+
%s2 = sub i32 0, %a
762+
%m = mul i32 %sel, %s2
763+
ret i32 %m
764+
}

llvm/test/Transforms/PhaseOrdering/min-max-abs-cse.ll

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,24 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
22
; RUN: opt < %s -S -O1 | FileCheck %s
3-
; RUN: opt -passes='default<O1>' -S < %s | FileCheck %s
3+
; RUN: opt -passes='default<O1>' -S < %s | FileCheck %s
44

55
; In all tests, expect instcombine to canonicalize the select patterns
66
; for min/max/abs to allow CSE and subsequent simplification.
77

88
; sub (smax a,b), (smax a,b) --> 0
99

10+
; FIXME: We should canonicalize min/max to a form
11+
; where the cmp operands match the select operands.
12+
1013
define i8 @smax_nsw(i8 %a, i8 %b) {
1114
; CHECK-LABEL: @smax_nsw(
12-
; CHECK-NEXT: ret i8 0
15+
; CHECK-NEXT: [[SUB:%.*]] = sub nsw i8 [[A:%.*]], [[B:%.*]]
16+
; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i8 [[A]], [[B]]
17+
; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i8 [[SUB]], 0
18+
; CHECK-NEXT: [[M1:%.*]] = select i1 [[CMP1]], i8 0, i8 [[SUB]]
19+
; CHECK-NEXT: [[M2:%.*]] = select i1 [[CMP2]], i8 [[SUB]], i8 0
20+
; CHECK-NEXT: [[R:%.*]] = sub nsw i8 [[M2]], [[M1]]
21+
; CHECK-NEXT: ret i8 [[R]]
1322
;
1423
%sub = sub nsw i8 %a, %b
1524
%cmp1 = icmp slt i8 %a, %b

0 commit comments

Comments
 (0)