Skip to content

Commit f57e87c

Browse files
committed
[InstCombine] Iterative replacement in PtrReplacer
This patch enhances the PtrReplacer as follows: 1. Users are now collected iteratively to be generous on the stack. In the case of PHIs with incoming values which have not yet been visited, they are pushed back into the stack for reconsideration. 2. Replace users of the pointer root in a reverse-postorder traversal, instead of a simple traversal over the collected users. This reordering ensures that the uses of an instruction are replaced before replacing the instruction itself. 3. During the replacement of PHI, use the same incoming value if it does not have a replacement. This patch specifically fixes the case when an incoming value of a PHI is addrspacecasted.
1 parent 98194fd commit f57e87c

File tree

2 files changed

+142
-68
lines changed

2 files changed

+142
-68
lines changed

llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp

Lines changed: 106 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -244,11 +244,10 @@ class PointerReplacer {
244244
void replacePointer(Value *V);
245245

246246
private:
247-
bool collectUsersRecursive(Instruction &I);
248247
void replace(Instruction *I);
249-
Value *getReplacement(Value *I);
248+
Value *getReplacement(Value *V) const { return WorkMap.lookup(V); }
250249
bool isAvailable(Instruction *I) const {
251-
return I == &Root || Worklist.contains(I);
250+
return I == &Root || UsersToReplace.contains(I);
252251
}
253252

254253
bool isEqualOrValidAddrSpaceCast(const Instruction *I,
@@ -260,8 +259,7 @@ class PointerReplacer {
260259
return (FromAS == ToAS) || IC.isValidAddrSpaceCast(FromAS, ToAS);
261260
}
262261

263-
SmallPtrSet<Instruction *, 32> ValuesToRevisit;
264-
SmallSetVector<Instruction *, 4> Worklist;
262+
SmallSetVector<Instruction *, 4> UsersToReplace;
265263
MapVector<Value *, Value *> WorkMap;
266264
InstCombinerImpl &IC;
267265
Instruction &Root;
@@ -270,80 +268,128 @@ class PointerReplacer {
270268
} // end anonymous namespace
271269

272270
bool PointerReplacer::collectUsers() {
273-
if (!collectUsersRecursive(Root))
274-
return false;
275-
276-
// Ensure that all outstanding (indirect) users of I
277-
// are inserted into the Worklist. Return false
278-
// otherwise.
279-
return llvm::set_is_subset(ValuesToRevisit, Worklist);
280-
}
271+
SmallVector<Instruction *> Worklist;
272+
SmallSetVector<Instruction *, 4> ValuesToRevisit;
273+
274+
auto PushUsersToWorklist = [&](Instruction *Inst) {
275+
for (auto *U : Inst->users()) {
276+
if (auto *I = dyn_cast<Instruction>(U)) {
277+
if (!isAvailable(I) && !ValuesToRevisit.contains(I))
278+
Worklist.emplace_back(I);
279+
}
280+
}
281+
};
281282

282-
bool PointerReplacer::collectUsersRecursive(Instruction &I) {
283-
for (auto *U : I.users()) {
284-
auto *Inst = cast<Instruction>(&*U);
283+
PushUsersToWorklist(&Root);
284+
while (!Worklist.empty()) {
285+
Instruction *Inst = Worklist.pop_back_val();
286+
if (!Inst)
287+
return false;
288+
if (isAvailable(Inst))
289+
continue;
285290
if (auto *Load = dyn_cast<LoadInst>(Inst)) {
286291
if (Load->isVolatile())
287292
return false;
288-
Worklist.insert(Load);
293+
UsersToReplace.insert(Load);
289294
} else if (auto *PHI = dyn_cast<PHINode>(Inst)) {
290295
// All incoming values must be instructions for replacability
291296
if (any_of(PHI->incoming_values(),
292297
[](Value *V) { return !isa<Instruction>(V); }))
293298
return false;
294299

295-
// If at least one incoming value of the PHI is not in Worklist,
296-
// store the PHI for revisiting and skip this iteration of the
297-
// loop.
298-
if (any_of(PHI->incoming_values(), [this](Value *V) {
299-
return !isAvailable(cast<Instruction>(V));
300-
})) {
301-
ValuesToRevisit.insert(Inst);
300+
// If all incoming values are available, mark this PHI as
301+
// replacable and push it's users into the worklist.
302+
if (all_of(PHI->incoming_values(),
303+
[&](Value *V) { return isAvailable(cast<Instruction>(V)); })) {
304+
UsersToReplace.insert(PHI);
305+
PushUsersToWorklist(PHI);
302306
continue;
303307
}
304308

305-
Worklist.insert(PHI);
306-
if (!collectUsersRecursive(*PHI))
307-
return false;
308-
} else if (auto *SI = dyn_cast<SelectInst>(Inst)) {
309-
if (!isa<Instruction>(SI->getTrueValue()) ||
310-
!isa<Instruction>(SI->getFalseValue()))
309+
// Not all incoming values are available. If this PHI was already
310+
// visited prior to this iteration, return false.
311+
if (!ValuesToRevisit.insert(PHI))
311312
return false;
312313

313-
if (!isAvailable(cast<Instruction>(SI->getTrueValue())) ||
314-
!isAvailable(cast<Instruction>(SI->getFalseValue()))) {
315-
ValuesToRevisit.insert(Inst);
316-
continue;
314+
// Push PHI back into the stack, followed by unavailable
315+
// incoming values.
316+
Worklist.emplace_back(PHI);
317+
for (unsigned Idx = 0; Idx < PHI->getNumIncomingValues(); ++Idx) {
318+
auto *IncomingValue = cast<Instruction>(PHI->getIncomingValue(Idx));
319+
if (UsersToReplace.contains(IncomingValue))
320+
continue;
321+
if (!ValuesToRevisit.insert(IncomingValue))
322+
return false;
323+
Worklist.emplace_back(IncomingValue);
317324
}
318-
Worklist.insert(SI);
319-
if (!collectUsersRecursive(*SI))
320-
return false;
321-
} else if (isa<GetElementPtrInst>(Inst)) {
322-
Worklist.insert(Inst);
323-
if (!collectUsersRecursive(*Inst))
325+
} else if (auto *SI = dyn_cast<SelectInst>(Inst)) {
326+
if (isa<Constant>(SI->getTrueValue()) &&
327+
isa<Constant>(SI->getFalseValue()))
328+
continue;
329+
330+
auto *TrueInst = dyn_cast<Instruction>(SI->getTrueValue());
331+
auto *FalseInst = dyn_cast<Instruction>(SI->getFalseValue());
332+
if (!TrueInst || !FalseInst)
324333
return false;
334+
335+
UsersToReplace.insert(SI);
336+
PushUsersToWorklist(SI);
337+
} else if (auto *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
338+
UsersToReplace.insert(GEP);
339+
PushUsersToWorklist(GEP);
325340
} else if (auto *MI = dyn_cast<MemTransferInst>(Inst)) {
326341
if (MI->isVolatile())
327342
return false;
328-
Worklist.insert(Inst);
343+
UsersToReplace.insert(Inst);
329344
} else if (isEqualOrValidAddrSpaceCast(Inst, FromAS)) {
330-
Worklist.insert(Inst);
331-
if (!collectUsersRecursive(*Inst))
332-
return false;
345+
UsersToReplace.insert(Inst);
346+
PushUsersToWorklist(Inst);
333347
} else if (Inst->isLifetimeStartOrEnd()) {
334348
continue;
335349
} else {
336350
// TODO: For arbitrary uses with address space mismatches, should we check
337351
// if we can introduce a valid addrspacecast?
338-
LLVM_DEBUG(dbgs() << "Cannot handle pointer user: " << *U << '\n');
352+
LLVM_DEBUG(dbgs() << "Cannot handle pointer user: " << *Inst << '\n');
339353
return false;
340354
}
341355
}
342356

343-
return true;
357+
return llvm::set_is_subset(ValuesToRevisit, UsersToReplace);
344358
}
345359

346-
Value *PointerReplacer::getReplacement(Value *V) { return WorkMap.lookup(V); }
360+
void PointerReplacer::replacePointer(Value *V) {
361+
assert(cast<PointerType>(Root.getType()) != cast<PointerType>(V->getType()) &&
362+
"Invalid usage");
363+
WorkMap[&Root] = V;
364+
SmallVector<Instruction *> Worklist;
365+
SetVector<Instruction *> PostOrderWorklist;
366+
SmallPtrSet<Instruction *, 4> Visited;
367+
368+
// Perform a postorder traversal of the users of Root.
369+
Worklist.push_back(&Root);
370+
while (!Worklist.empty()) {
371+
Instruction *I = Worklist.back();
372+
373+
// If I has not been processed before, push each of its
374+
// replacable users into the worklist.
375+
if (Visited.insert(I).second) {
376+
for (auto *U : I->users()) {
377+
auto *UserInst = cast<Instruction>(U);
378+
if (UsersToReplace.contains(UserInst))
379+
Worklist.push_back(UserInst);
380+
}
381+
// Otherwise, users of I have already been pushed into
382+
// the PostOrderWorklist. Push I as well.
383+
} else {
384+
PostOrderWorklist.insert(I);
385+
Worklist.pop_back();
386+
}
387+
}
388+
389+
// Replace pointers in reverse-postorder.
390+
for (Instruction *I : reverse(PostOrderWorklist))
391+
replace(I);
392+
}
347393

348394
void PointerReplacer::replace(Instruction *I) {
349395
if (getReplacement(I))
@@ -365,12 +411,20 @@ void PointerReplacer::replace(Instruction *I) {
365411
// replacement (new value).
366412
WorkMap[NewI] = NewI;
367413
} else if (auto *PHI = dyn_cast<PHINode>(I)) {
368-
Type *NewTy = getReplacement(PHI->getIncomingValue(0))->getType();
369-
auto *NewPHI = PHINode::Create(NewTy, PHI->getNumIncomingValues(),
370-
PHI->getName(), PHI->getIterator());
414+
// Create a new PHI by replacing any incoming value that is a user of the
415+
// root pointer and has a replacement.
416+
SmallVector<Value *, 4> IncomingValues;
417+
for (unsigned int I = 0; I < PHI->getNumIncomingValues(); ++I) {
418+
Value *V = getReplacement(PHI->getIncomingValue(I));
419+
if (!V)
420+
V = PHI->getIncomingValue(I);
421+
IncomingValues.push_back(V);
422+
}
423+
auto *NewPHI = PHINode::Create(IncomingValues[0]->getType(),
424+
PHI->getNumIncomingValues(),
425+
PHI->getName() + ".r", PHI->getIterator());
371426
for (unsigned int I = 0; I < PHI->getNumIncomingValues(); ++I)
372-
NewPHI->addIncoming(getReplacement(PHI->getIncomingValue(I)),
373-
PHI->getIncomingBlock(I));
427+
NewPHI->addIncoming(IncomingValues[I], PHI->getIncomingBlock(I));
374428
WorkMap[PHI] = NewPHI;
375429
} else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
376430
auto *V = getReplacement(GEP->getPointerOperand());
@@ -435,18 +489,6 @@ void PointerReplacer::replace(Instruction *I) {
435489
}
436490
}
437491

438-
void PointerReplacer::replacePointer(Value *V) {
439-
#ifndef NDEBUG
440-
auto *PT = cast<PointerType>(Root.getType());
441-
auto *NT = cast<PointerType>(V->getType());
442-
assert(PT != NT && "Invalid usage");
443-
#endif
444-
WorkMap[&Root] = V;
445-
446-
for (Instruction *Workitem : Worklist)
447-
replace(Workitem);
448-
}
449-
450492
Instruction *InstCombinerImpl::visitAllocaInst(AllocaInst &AI) {
451493
if (auto *I = simplifyAllocaArraySize(*this, AI, DT))
452494
return I;

llvm/test/Transforms/InstCombine/AMDGPU/ptr-replace-alloca.ll

Lines changed: 36 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,16 @@
44
%struct.type = type { [256 x <2 x i64>] }
55
@g1 = external hidden addrspace(3) global %struct.type, align 16
66

7+
; This test requires the PtrReplacer to replace users in an RPO traversal.
8+
; Furthermore, %ptr.else need not to be replaced so it must be retained in
9+
; %ptr.sink.
710
define <2 x i64> @func(ptr addrspace(4) byref(%struct.type) align 16 %0, i1 %cmp.0) {
811
; CHECK-LABEL: define <2 x i64> @func(
912
; CHECK-SAME: ptr addrspace(4) byref([[STRUCT_TYPE:%.*]]) align 16 [[TMP0:%.*]], i1 [[CMP_0:%.*]]) {
1013
; CHECK-NEXT: [[ENTRY:.*:]]
11-
; CHECK-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_TYPE]], align 16, addrspace(5)
12-
; CHECK-NEXT: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef align 16 dereferenceable(4096) [[COERCE]], ptr addrspace(4) noundef align 16 dereferenceable(4096) [[TMP0]], i64 4096, i1 false)
1314
; CHECK-NEXT: br i1 [[CMP_0]], label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]]
1415
; CHECK: [[IF_THEN]]:
15-
; CHECK-NEXT: [[VAL_THEN:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr
16+
; CHECK-NEXT: [[VAL_THEN:%.*]] = addrspacecast ptr addrspace(4) [[TMP0]] to ptr
1617
; CHECK-NEXT: br label %[[SINK:.*]]
1718
; CHECK: [[IF_ELSE]]:
1819
; CHECK-NEXT: [[PTR_ELSE:%.*]] = load ptr, ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) @g1, i32 32), align 16
@@ -43,5 +44,36 @@ sink:
4344
ret <2 x i64> %val.sink
4445
}
4546

46-
; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite)
47+
define <2 x i64> @func_phi_loop(ptr addrspace(4) byref(%struct.type) align 16 %0, i1 %cmp.0) {
48+
; CHECK-LABEL: define <2 x i64> @func_phi_loop(
49+
; CHECK-SAME: ptr addrspace(4) byref([[STRUCT_TYPE:%.*]]) align 16 [[TMP0:%.*]], i1 [[CMP_0:%.*]]) {
50+
; CHECK-NEXT: [[ENTRY:.*]]:
51+
; CHECK-NEXT: [[VAL_0:%.*]] = addrspacecast ptr addrspace(4) [[TMP0]] to ptr
52+
; CHECK-NEXT: br label %[[LOOP:.*]]
53+
; CHECK: [[LOOP]]:
54+
; CHECK-NEXT: [[PTR_PHI_R:%.*]] = phi ptr [ [[PTR_1:%.*]], %[[LOOP]] ], [ [[VAL_0]], %[[ENTRY]] ]
55+
; CHECK-NEXT: [[PTR_1]] = load ptr, ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) @g1, i32 32), align 16
56+
; CHECK-NEXT: br i1 [[CMP_0]], label %[[LOOP]], label %[[SINK:.*]]
57+
; CHECK: [[SINK]]:
58+
; CHECK-NEXT: [[VAL_SINK:%.*]] = load <2 x i64>, ptr [[PTR_PHI_R]], align 16
59+
; CHECK-NEXT: ret <2 x i64> [[VAL_SINK]]
60+
;
61+
entry:
62+
%coerce = alloca %struct.type, align 16, addrspace(5)
63+
call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 16 %coerce, ptr addrspace(4) align 16 %0, i64 4096, i1 false)
64+
%ptr.0 = getelementptr inbounds i8, ptr addrspace(5) %coerce, i64 0
65+
%val.0 = addrspacecast ptr addrspace(5) %ptr.0 to ptr
66+
br label %loop
67+
68+
loop:
69+
%ptr.phi = phi ptr [ %val.1, %loop ], [ %val.0, %entry ]
70+
%ptr.1 = load ptr, ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) @g1, i32 32), align 16
71+
%val.1 = getelementptr inbounds nuw i8, ptr %ptr.1, i64 0
72+
br i1 %cmp.0, label %loop, label %sink
73+
74+
sink:
75+
%val.sink = load <2 x i64>, ptr %ptr.phi, align 16
76+
ret <2 x i64> %val.sink
77+
}
78+
4779
declare void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noalias writeonly captures(none), ptr addrspace(4) noalias readonly captures(none), i64, i1 immarg) #0

0 commit comments

Comments
 (0)