diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index 9aec90120d8b0..a9751ab03e20e 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -243,10 +243,11 @@ class PointerReplacer { void replacePointer(Value *V); private: + bool collectUsersRecursive(Instruction &I); void replace(Instruction *I); - Value *getReplacement(Value *V) const { return WorkMap.lookup(V); } + Value *getReplacement(Value *I); bool isAvailable(Instruction *I) const { - return I == &Root || UsersToReplace.contains(I); + return I == &Root || Worklist.contains(I); } bool isEqualOrValidAddrSpaceCast(const Instruction *I, @@ -258,7 +259,8 @@ class PointerReplacer { return (FromAS == ToAS) || IC.isValidAddrSpaceCast(FromAS, ToAS); } - SmallSetVector UsersToReplace; + SmallPtrSet ValuesToRevisit; + SmallSetVector Worklist; MapVector WorkMap; InstCombinerImpl &IC; Instruction &Root; @@ -267,79 +269,72 @@ class PointerReplacer { } // end anonymous namespace bool PointerReplacer::collectUsers() { - SmallVector Worklist; - SmallSetVector ValuesToRevisit; - - auto PushUsersToWorklist = [&](Instruction *Inst) { - for (auto *U : Inst->users()) - if (auto *I = dyn_cast(U)) - if (!isAvailable(I) && !ValuesToRevisit.contains(I)) - Worklist.emplace_back(I); - }; + if (!collectUsersRecursive(Root)) + return false; - PushUsersToWorklist(&Root); - while (!Worklist.empty()) { - Instruction *Inst = Worklist.pop_back_val(); + // Ensure that all outstanding (indirect) users of I + // are inserted into the Worklist. Return false + // otherwise. + return llvm::set_is_subset(ValuesToRevisit, Worklist); +} + +bool PointerReplacer::collectUsersRecursive(Instruction &I) { + for (auto *U : I.users()) { + auto *Inst = cast(&*U); if (auto *Load = dyn_cast(Inst)) { if (Load->isVolatile()) return false; - UsersToReplace.insert(Load); + Worklist.insert(Load); } else if (auto *PHI = dyn_cast(Inst)) { - /// TODO: Handle poison and null pointers for PHI and select. - // If all incoming values are available, mark this PHI as - // replacable and push it's users into the worklist. - bool IsReplacable = true; - if (all_of(PHI->incoming_values(), [&](Value *V) { - if (!isa(V)) - return IsReplacable = false; - return isAvailable(cast(V)); + // All incoming values must be instructions for replacability + if (any_of(PHI->incoming_values(), + [](Value *V) { return !isa(V); })) + return false; + + // If at least one incoming value of the PHI is not in Worklist, + // store the PHI for revisiting and skip this iteration of the + // loop. + if (any_of(PHI->incoming_values(), [this](Value *V) { + return !isAvailable(cast(V)); })) { - UsersToReplace.insert(PHI); - PushUsersToWorklist(PHI); + ValuesToRevisit.insert(Inst); continue; } - // Either an incoming value is not an instruction or not all - // incoming values are available. If this PHI was already - // visited prior to this iteration, return false. - if (!IsReplacable || !ValuesToRevisit.insert(PHI)) + Worklist.insert(PHI); + if (!collectUsersRecursive(*PHI)) return false; - - // Push PHI back into the stack, followed by unavailable - // incoming values. - Worklist.emplace_back(PHI); - for (unsigned Idx = 0; Idx < PHI->getNumIncomingValues(); ++Idx) { - auto *IncomingValue = cast(PHI->getIncomingValue(Idx)); - if (UsersToReplace.contains(IncomingValue)) - continue; - if (!ValuesToRevisit.insert(IncomingValue)) - return false; - Worklist.emplace_back(IncomingValue); - } } else if (auto *SI = dyn_cast(Inst)) { - auto *TrueInst = dyn_cast(SI->getTrueValue()); - auto *FalseInst = dyn_cast(SI->getFalseValue()); - if (!TrueInst || !FalseInst) + if (!isa(SI->getTrueValue()) || + !isa(SI->getFalseValue())) return false; - UsersToReplace.insert(SI); - PushUsersToWorklist(SI); - } else if (auto *GEP = dyn_cast(Inst)) { - UsersToReplace.insert(GEP); - PushUsersToWorklist(GEP); + if (!isAvailable(cast(SI->getTrueValue())) || + !isAvailable(cast(SI->getFalseValue()))) { + ValuesToRevisit.insert(Inst); + continue; + } + Worklist.insert(SI); + if (!collectUsersRecursive(*SI)) + return false; + } else if (isa(Inst)) { + Worklist.insert(Inst); + if (!collectUsersRecursive(*Inst)) + return false; } else if (auto *MI = dyn_cast(Inst)) { if (MI->isVolatile()) return false; - UsersToReplace.insert(Inst); + Worklist.insert(Inst); } else if (isEqualOrValidAddrSpaceCast(Inst, FromAS)) { - UsersToReplace.insert(Inst); - PushUsersToWorklist(Inst); + Worklist.insert(Inst); + if (!collectUsersRecursive(*Inst)) + return false; } else if (Inst->isLifetimeStartOrEnd()) { continue; } else { // TODO: For arbitrary uses with address space mismatches, should we check // if we can introduce a valid addrspacecast? - LLVM_DEBUG(dbgs() << "Cannot handle pointer user: " << *Inst << '\n'); + LLVM_DEBUG(dbgs() << "Cannot handle pointer user: " << *U << '\n'); return false; } } @@ -347,39 +342,7 @@ bool PointerReplacer::collectUsers() { return true; } -void PointerReplacer::replacePointer(Value *V) { - assert(cast(Root.getType()) != cast(V->getType()) && - "Invalid usage"); - WorkMap[&Root] = V; - SmallVector Worklist; - SetVector PostOrderWorklist; - SmallPtrSet Visited; - - // Perform a postorder traversal of the users of Root. - Worklist.push_back(&Root); - while (!Worklist.empty()) { - Instruction *I = Worklist.back(); - - // If I has not been processed before, push each of its - // replacable users into the worklist. - if (Visited.insert(I).second) { - for (auto *U : I->users()) { - auto *UserInst = cast(U); - if (UsersToReplace.contains(UserInst)) - Worklist.push_back(UserInst); - } - // Otherwise, users of I have already been pushed into - // the PostOrderWorklist. Push I as well. - } else { - PostOrderWorklist.insert(I); - Worklist.pop_back(); - } - } - - // Replace pointers in reverse-postorder. - for (Instruction *I : reverse(PostOrderWorklist)) - replace(I); -} +Value *PointerReplacer::getReplacement(Value *V) { return WorkMap.lookup(V); } void PointerReplacer::replace(Instruction *I) { if (getReplacement(I)) @@ -401,15 +364,13 @@ void PointerReplacer::replace(Instruction *I) { // replacement (new value). WorkMap[NewI] = NewI; } else if (auto *PHI = dyn_cast(I)) { - // Create a new PHI by replacing any incoming value that is a user of the - // root pointer and has a replacement. - Value *V = WorkMap.lookup(PHI->getIncomingValue(0)); - PHI->mutateType(V ? V->getType() : PHI->getIncomingValue(0)->getType()); - for (unsigned int I = 0; I < PHI->getNumIncomingValues(); ++I) { - Value *V = WorkMap.lookup(PHI->getIncomingValue(I)); - PHI->setIncomingValue(I, V ? V : PHI->getIncomingValue(I)); - } - WorkMap[PHI] = PHI; + Type *NewTy = getReplacement(PHI->getIncomingValue(0))->getType(); + auto *NewPHI = PHINode::Create(NewTy, PHI->getNumIncomingValues(), + PHI->getName(), PHI->getIterator()); + for (unsigned int I = 0; I < PHI->getNumIncomingValues(); ++I) + NewPHI->addIncoming(getReplacement(PHI->getIncomingValue(I)), + PHI->getIncomingBlock(I)); + WorkMap[PHI] = NewPHI; } else if (auto *GEP = dyn_cast(I)) { auto *V = getReplacement(GEP->getPointerOperand()); assert(V && "Operand not replaced"); @@ -473,6 +434,18 @@ void PointerReplacer::replace(Instruction *I) { } } +void PointerReplacer::replacePointer(Value *V) { +#ifndef NDEBUG + auto *PT = cast(Root.getType()); + auto *NT = cast(V->getType()); + assert(PT != NT && "Invalid usage"); +#endif + WorkMap[&Root] = V; + + for (Instruction *Workitem : Worklist) + replace(Workitem); +} + Instruction *InstCombinerImpl::visitAllocaInst(AllocaInst &AI) { if (auto *I = simplifyAllocaArraySize(*this, AI, DT)) return I; diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/ptr-replace-alloca.ll b/llvm/test/Transforms/InstCombine/AMDGPU/ptr-replace-alloca.ll deleted file mode 100644 index 538cc19f9722e..0000000000000 --- a/llvm/test/Transforms/InstCombine/AMDGPU/ptr-replace-alloca.ll +++ /dev/null @@ -1,79 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -mtriple=amdgcn-amd-amdhsa -passes=instcombine -S < %s | FileCheck %s - -%struct.type = type { [256 x <2 x i64>] } -@g1 = external hidden addrspace(3) global %struct.type, align 16 - -; This test requires the PtrReplacer to replace users in an RPO traversal. -; Furthermore, %ptr.else need not to be replaced so it must be retained in -; %ptr.sink. -define <2 x i64> @func(ptr addrspace(4) byref(%struct.type) align 16 %0, i1 %cmp.0) { -; CHECK-LABEL: define <2 x i64> @func( -; CHECK-SAME: ptr addrspace(4) byref([[STRUCT_TYPE:%.*]]) align 16 [[TMP0:%.*]], i1 [[CMP_0:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: br i1 [[CMP_0]], label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]] -; CHECK: [[IF_THEN]]: -; CHECK-NEXT: [[VAL_THEN:%.*]] = addrspacecast ptr addrspace(4) [[TMP0]] to ptr -; CHECK-NEXT: br label %[[SINK:.*]] -; CHECK: [[IF_ELSE]]: -; CHECK-NEXT: [[PTR_ELSE:%.*]] = load ptr, ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) @g1, i32 32), align 16 -; CHECK-NEXT: br label %[[SINK]] -; CHECK: [[SINK]]: -; CHECK-NEXT: [[PTR_SINK:%.*]] = phi ptr [ [[PTR_ELSE]], %[[IF_ELSE]] ], [ [[VAL_THEN]], %[[IF_THEN]] ] -; CHECK-NEXT: [[VAL_SINK:%.*]] = load <2 x i64>, ptr [[PTR_SINK]], align 16 -; CHECK-NEXT: ret <2 x i64> [[VAL_SINK]] -; -entry: - %coerce = alloca %struct.type, align 16, addrspace(5) - call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 16 %coerce, ptr addrspace(4) align 16 %0, i64 4096, i1 false) - br i1 %cmp.0, label %if.then, label %if.else - -if.then: ; preds = %entry - %ptr.then = getelementptr inbounds i8, ptr addrspace(5) %coerce, i64 0 - %val.then = addrspacecast ptr addrspace(5) %ptr.then to ptr - br label %sink - -if.else: ; preds = %entry - %ptr.else = load ptr, ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) @g1, i32 32), align 16 - %val.else = getelementptr inbounds nuw i8, ptr %ptr.else, i64 0 - br label %sink - -sink: - %ptr.sink = phi ptr [ %val.else, %if.else ], [ %val.then, %if.then ] - %val.sink = load <2 x i64>, ptr %ptr.sink, align 16 - ret <2 x i64> %val.sink -} - -define <2 x i64> @func_phi_loop(ptr addrspace(4) byref(%struct.type) align 16 %0, i1 %cmp.0) { -; CHECK-LABEL: define <2 x i64> @func_phi_loop( -; CHECK-SAME: ptr addrspace(4) byref([[STRUCT_TYPE:%.*]]) align 16 [[TMP0:%.*]], i1 [[CMP_0:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*]]: -; CHECK-NEXT: [[VAL_0:%.*]] = addrspacecast ptr addrspace(4) [[TMP0]] to ptr -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[PTR_PHI_R:%.*]] = phi ptr [ [[PTR_1:%.*]], %[[LOOP]] ], [ [[VAL_0]], %[[ENTRY]] ] -; CHECK-NEXT: [[PTR_1]] = load ptr, ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) @g1, i32 32), align 16 -; CHECK-NEXT: br i1 [[CMP_0]], label %[[LOOP]], label %[[SINK:.*]] -; CHECK: [[SINK]]: -; CHECK-NEXT: [[VAL_SINK:%.*]] = load <2 x i64>, ptr [[PTR_PHI_R]], align 16 -; CHECK-NEXT: ret <2 x i64> [[VAL_SINK]] -; -entry: - %coerce = alloca %struct.type, align 16, addrspace(5) - call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) align 16 %coerce, ptr addrspace(4) align 16 %0, i64 4096, i1 false) - %ptr.0 = getelementptr inbounds i8, ptr addrspace(5) %coerce, i64 0 - %val.0 = addrspacecast ptr addrspace(5) %ptr.0 to ptr - br label %loop - -loop: - %ptr.phi = phi ptr [ %val.1, %loop ], [ %val.0, %entry ] - %ptr.1 = load ptr, ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) @g1, i32 32), align 16 - %val.1 = getelementptr inbounds nuw i8, ptr %ptr.1, i64 0 - br i1 %cmp.0, label %loop, label %sink - -sink: - %val.sink = load <2 x i64>, ptr %ptr.phi, align 16 - ret <2 x i64> %val.sink -} - -declare void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noalias writeonly captures(none), ptr addrspace(4) noalias readonly captures(none), i64, i1 immarg) #0