@@ -189,8 +189,9 @@ RuntimeCheckingPtrGroup::RuntimeCheckingPtrGroup(
189
189
// /
190
190
// / There is no conflict when the intervals are disjoint:
191
191
// / NoConflict = (P2.Start >= P1.End) || (P1.Start >= P2.End)
192
- void RuntimePointerChecking::insert (Loop *Lp, Value *Ptr, bool WritePtr,
193
- unsigned DepSetId, unsigned ASId,
192
+ void RuntimePointerChecking::insert (Loop *Lp, Value *Ptr, Type *AccessTy,
193
+ bool WritePtr, unsigned DepSetId,
194
+ unsigned ASId,
194
195
const ValueToValueMap &Strides,
195
196
PredicatedScalarEvolution &PSE) {
196
197
// Get the stride replaced scev.
@@ -227,8 +228,7 @@ void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr,
227
228
// Add the size of the pointed element to ScEnd.
228
229
auto &DL = Lp->getHeader ()->getModule ()->getDataLayout ();
229
230
Type *IdxTy = DL.getIndexType (Ptr->getType ());
230
- const SCEV *EltSizeSCEV =
231
- SE->getStoreSizeOfExpr (IdxTy, Ptr->getType ()->getPointerElementType ());
231
+ const SCEV *EltSizeSCEV = SE->getStoreSizeOfExpr (IdxTy, AccessTy);
232
232
ScEnd = SE->getAddExpr (ScEnd, EltSizeSCEV);
233
233
234
234
Pointers.emplace_back (Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, Sc);
@@ -522,19 +522,19 @@ class AccessAnalysis {
522
522
: TheLoop(TheLoop), AST(*AA), LI(LI), DepCands(DA), PSE(PSE) {}
523
523
524
524
// / Register a load and whether it is only read from.
525
- void addLoad (MemoryLocation &Loc, bool IsReadOnly) {
525
+ void addLoad (MemoryLocation &Loc, Type *AccessTy, bool IsReadOnly) {
526
526
Value *Ptr = const_cast <Value*>(Loc.Ptr );
527
527
AST.add (Ptr, LocationSize::beforeOrAfterPointer (), Loc.AATags );
528
- Accesses. insert ( MemAccessInfo (Ptr, false ));
528
+ Accesses[ MemAccessInfo (Ptr, false )]. insert (AccessTy );
529
529
if (IsReadOnly)
530
530
ReadOnlyPtr.insert (Ptr);
531
531
}
532
532
533
533
// / Register a store.
534
- void addStore (MemoryLocation &Loc) {
534
+ void addStore (MemoryLocation &Loc, Type *AccessTy ) {
535
535
Value *Ptr = const_cast <Value*>(Loc.Ptr );
536
536
AST.add (Ptr, LocationSize::beforeOrAfterPointer (), Loc.AATags );
537
- Accesses. insert ( MemAccessInfo (Ptr, true ));
537
+ Accesses[ MemAccessInfo (Ptr, true )]. insert (AccessTy );
538
538
}
539
539
540
540
// / Check if we can emit a run-time no-alias check for \p Access.
@@ -545,12 +545,11 @@ class AccessAnalysis {
545
545
// / we will attempt to use additional run-time checks in order to get
546
546
// / the bounds of the pointer.
547
547
bool createCheckForAccess (RuntimePointerChecking &RtCheck,
548
- MemAccessInfo Access,
548
+ MemAccessInfo Access, Type *AccessTy,
549
549
const ValueToValueMap &Strides,
550
550
DenseMap<Value *, unsigned > &DepSetId,
551
551
Loop *TheLoop, unsigned &RunningDepId,
552
- unsigned ASId, bool ShouldCheckStride,
553
- bool Assume);
552
+ unsigned ASId, bool ShouldCheckStride, bool Assume);
554
553
555
554
// / Check whether we can check the pointers at runtime for
556
555
// / non-intersection.
@@ -583,14 +582,15 @@ class AccessAnalysis {
583
582
MemAccessInfoList &getDependenciesToCheck () { return CheckDeps; }
584
583
585
584
private:
586
- typedef SetVector <MemAccessInfo> PtrAccessSet ;
585
+ typedef MapVector <MemAccessInfo, SmallSetVector<Type *, 1 >> PtrAccessMap ;
587
586
588
587
// / Go over all memory access and check whether runtime pointer checks
589
588
// / are needed and build sets of dependency check candidates.
590
589
void processMemAccesses ();
591
590
592
- // / Set of all accesses.
593
- PtrAccessSet Accesses;
591
+ // / Map of all accesses. Values are the types used to access memory pointed to
592
+ // / by the pointer.
593
+ PtrAccessMap Accesses;
594
594
595
595
// / The loop being checked.
596
596
const Loop *TheLoop;
@@ -652,12 +652,12 @@ static bool hasComputableBounds(PredicatedScalarEvolution &PSE,
652
652
653
653
// / Check whether a pointer address cannot wrap.
654
654
static bool isNoWrap (PredicatedScalarEvolution &PSE,
655
- const ValueToValueMap &Strides, Value *Ptr, Loop *L) {
655
+ const ValueToValueMap &Strides, Value *Ptr, Type *AccessTy,
656
+ Loop *L) {
656
657
const SCEV *PtrScev = PSE.getSCEV (Ptr);
657
658
if (PSE.getSE ()->isLoopInvariant (PtrScev, L))
658
659
return true ;
659
660
660
- Type *AccessTy = Ptr->getType ()->getPointerElementType ();
661
661
int64_t Stride = getPtrStride (PSE, AccessTy, Ptr, L, Strides);
662
662
if (Stride == 1 || PSE.hasNoOverflow (Ptr, SCEVWrapPredicate::IncrementNUSW))
663
663
return true ;
@@ -689,7 +689,7 @@ static void visitPointers(Value *StartPtr, const Loop &InnermostLoop,
689
689
}
690
690
691
691
bool AccessAnalysis::createCheckForAccess (RuntimePointerChecking &RtCheck,
692
- MemAccessInfo Access,
692
+ MemAccessInfo Access, Type *AccessTy,
693
693
const ValueToValueMap &StridesMap,
694
694
DenseMap<Value *, unsigned > &DepSetId,
695
695
Loop *TheLoop, unsigned &RunningDepId,
@@ -702,7 +702,7 @@ bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck,
702
702
703
703
// When we run after a failing dependency check we have to make sure
704
704
// we don't have wrapping pointers.
705
- if (ShouldCheckWrap && !isNoWrap (PSE, StridesMap, Ptr, TheLoop)) {
705
+ if (ShouldCheckWrap && !isNoWrap (PSE, StridesMap, Ptr, AccessTy, TheLoop)) {
706
706
auto *Expr = PSE.getSCEV (Ptr);
707
707
if (!Assume || !isa<SCEVAddRecExpr>(Expr))
708
708
return false ;
@@ -723,11 +723,11 @@ bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck,
723
723
DepId = RunningDepId++;
724
724
725
725
bool IsWrite = Access.getInt ();
726
- RtCheck.insert (TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap, PSE);
726
+ RtCheck.insert (TheLoop, Ptr, AccessTy, IsWrite, DepId, ASId, StridesMap, PSE);
727
727
LLVM_DEBUG (dbgs () << " LAA: Found a runtime check ptr:" << *Ptr << ' \n ' );
728
728
729
729
return true ;
730
- }
730
+ }
731
731
732
732
bool AccessAnalysis::canCheckPtrAtRT (RuntimePointerChecking &RtCheck,
733
733
ScalarEvolution *SE, Loop *TheLoop,
@@ -788,12 +788,15 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
788
788
}
789
789
790
790
for (auto &Access : AccessInfos) {
791
- if (!createCheckForAccess (RtCheck, Access, StridesMap, DepSetId, TheLoop,
792
- RunningDepId, ASId, ShouldCheckWrap, false )) {
793
- LLVM_DEBUG (dbgs () << " LAA: Can't find bounds for ptr:"
794
- << *Access.getPointer () << ' \n ' );
795
- Retries.push_back (Access);
796
- CanDoAliasSetRT = false ;
791
+ for (auto &AccessTy : Accesses[Access]) {
792
+ if (!createCheckForAccess (RtCheck, Access, AccessTy, StridesMap,
793
+ DepSetId, TheLoop, RunningDepId, ASId,
794
+ ShouldCheckWrap, false )) {
795
+ LLVM_DEBUG (dbgs () << " LAA: Can't find bounds for ptr:"
796
+ << *Access.getPointer () << ' \n ' );
797
+ Retries.push_back (Access);
798
+ CanDoAliasSetRT = false ;
799
+ }
797
800
}
798
801
}
799
802
@@ -815,13 +818,16 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
815
818
// We know that we need these checks, so we can now be more aggressive
816
819
// and add further checks if required (overflow checks).
817
820
CanDoAliasSetRT = true ;
818
- for (auto Access : Retries)
819
- if (!createCheckForAccess (RtCheck, Access, StridesMap, DepSetId,
820
- TheLoop, RunningDepId, ASId,
821
- ShouldCheckWrap, /* Assume=*/ true )) {
822
- CanDoAliasSetRT = false ;
823
- break ;
821
+ for (auto Access : Retries) {
822
+ for (auto &AccessTy : Accesses[Access]) {
823
+ if (!createCheckForAccess (RtCheck, Access, AccessTy, StridesMap,
824
+ DepSetId, TheLoop, RunningDepId, ASId,
825
+ ShouldCheckWrap, /* Assume=*/ true )) {
826
+ CanDoAliasSetRT = false ;
827
+ break ;
828
+ }
824
829
}
830
+ }
825
831
}
826
832
827
833
CanDoRT &= CanDoAliasSetRT;
@@ -886,9 +892,12 @@ void AccessAnalysis::processMemAccesses() {
886
892
LLVM_DEBUG (dbgs () << " LAA: Accesses(" << Accesses.size () << " ):\n " );
887
893
LLVM_DEBUG ({
888
894
for (auto A : Accesses)
889
- dbgs () << " \t " << *A.getPointer () << " (" <<
890
- (A.getInt () ? " write" : (ReadOnlyPtr.count (A.getPointer ()) ?
891
- " read-only" : " read" )) << " )\n " ;
895
+ dbgs () << " \t " << *A.first .getPointer () << " ("
896
+ << (A.first .getInt ()
897
+ ? " write"
898
+ : (ReadOnlyPtr.count (A.first .getPointer ()) ? " read-only"
899
+ : " read" ))
900
+ << " )\n " ;
892
901
});
893
902
894
903
// The AliasSetTracker has nicely partitioned our pointers by metadata
@@ -907,24 +916,24 @@ void AccessAnalysis::processMemAccesses() {
907
916
UnderlyingObjToAccessMap ObjToLastAccess;
908
917
909
918
// Set of access to check after all writes have been processed.
910
- PtrAccessSet DeferredAccesses;
919
+ PtrAccessMap DeferredAccesses;
911
920
912
921
// Iterate over each alias set twice, once to process read/write pointers,
913
922
// and then to process read-only pointers.
914
923
for (int SetIteration = 0 ; SetIteration < 2 ; ++SetIteration) {
915
924
bool UseDeferred = SetIteration > 0 ;
916
- PtrAccessSet &S = UseDeferred ? DeferredAccesses : Accesses;
925
+ PtrAccessMap &S = UseDeferred ? DeferredAccesses : Accesses;
917
926
918
927
for (const auto &AV : AS) {
919
928
Value *Ptr = AV.getValue ();
920
929
921
930
// For a single memory access in AliasSetTracker, Accesses may contain
922
931
// both read and write, and they both need to be handled for CheckDeps.
923
932
for (const auto &AC : S) {
924
- if (AC.getPointer () != Ptr)
933
+ if (AC.first . getPointer () != Ptr)
925
934
continue ;
926
935
927
- bool IsWrite = AC.getInt ();
936
+ bool IsWrite = AC.first . getInt ();
928
937
929
938
// If we're using the deferred access set, then it contains only
930
939
// reads.
@@ -946,7 +955,9 @@ void AccessAnalysis::processMemAccesses() {
946
955
// consecutive as "read-only" pointers (so that we check
947
956
// "a[b[i]] +="). Hence, we need the second check for "!IsWrite".
948
957
if (!UseDeferred && IsReadOnlyPtr) {
949
- DeferredAccesses.insert (Access);
958
+ // We only use the pointer keys, the types vector values don't
959
+ // matter.
960
+ DeferredAccesses.insert ({Access, {}});
950
961
continue ;
951
962
}
952
963
@@ -1518,8 +1529,8 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
1518
1529
Value *BPtr = B.getPointer ();
1519
1530
bool AIsWrite = A.getInt ();
1520
1531
bool BIsWrite = B.getInt ();
1521
- Type *ATy = APtr-> getType ()-> getPointerElementType ( );
1522
- Type *BTy = BPtr-> getType ()-> getPointerElementType ( );
1532
+ Type *ATy = getLoadStoreType (InstMap[AIdx] );
1533
+ Type *BTy = getLoadStoreType (InstMap[BIdx] );
1523
1534
1524
1535
// Two reads are independent.
1525
1536
if (!AIsWrite && !BIsWrite)
@@ -1842,8 +1853,6 @@ bool LoopAccessInfo::canAnalyzeLoop() {
1842
1853
void LoopAccessInfo::analyzeLoop (AAResults *AA, LoopInfo *LI,
1843
1854
const TargetLibraryInfo *TLI,
1844
1855
DominatorTree *DT) {
1845
- typedef SmallPtrSet<Value*, 16 > ValueSet;
1846
-
1847
1856
// Holds the Load and Store instructions.
1848
1857
SmallVector<LoadInst *, 16 > Loads;
1849
1858
SmallVector<StoreInst *, 16 > Stores;
@@ -1975,11 +1984,11 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
1975
1984
// for read and once for write, it will only appear once (on the write
1976
1985
// list). This is okay, since we are going to check for conflicts between
1977
1986
// writes and between reads and writes, but not between reads and reads.
1978
- ValueSet Seen;
1987
+ SmallSet<std::pair<Value *, Type *>, 16 > Seen;
1979
1988
1980
1989
// Record uniform store addresses to identify if we have multiple stores
1981
1990
// to the same address.
1982
- ValueSet UniformStores;
1991
+ SmallPtrSet<Value *, 16 > UniformStores;
1983
1992
1984
1993
for (StoreInst *ST : Stores) {
1985
1994
Value *Ptr = ST->getPointerOperand ();
@@ -1990,7 +1999,8 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
1990
1999
1991
2000
// If we did *not* see this pointer before, insert it to the read-write
1992
2001
// list. At this phase it is only a 'write' list.
1993
- if (Seen.insert (Ptr).second ) {
2002
+ Type *AccessTy = getLoadStoreType (ST);
2003
+ if (Seen.insert ({Ptr, AccessTy}).second ) {
1994
2004
++NumReadWrites;
1995
2005
1996
2006
MemoryLocation Loc = MemoryLocation::get (ST);
@@ -2001,9 +2011,9 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
2001
2011
Loc.AATags .TBAA = nullptr ;
2002
2012
2003
2013
visitPointers (const_cast <Value *>(Loc.Ptr ), *TheLoop,
2004
- [&Accesses, Loc](Value *Ptr) {
2014
+ [&Accesses, AccessTy, Loc](Value *Ptr) {
2005
2015
MemoryLocation NewLoc = Loc.getWithNewPtr (Ptr);
2006
- Accesses.addStore (NewLoc);
2016
+ Accesses.addStore (NewLoc, AccessTy );
2007
2017
});
2008
2018
}
2009
2019
}
@@ -2027,7 +2037,8 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
2027
2037
// read a few words, modify, and write a few words, and some of the
2028
2038
// words may be written to the same address.
2029
2039
bool IsReadOnlyPtr = false ;
2030
- if (Seen.insert (Ptr).second ||
2040
+ Type *AccessTy = getLoadStoreType (LD);
2041
+ if (Seen.insert ({Ptr, AccessTy}).second ||
2031
2042
!getPtrStride (*PSE, LD->getType (), Ptr, TheLoop, SymbolicStrides)) {
2032
2043
++NumReads;
2033
2044
IsReadOnlyPtr = true ;
@@ -2049,9 +2060,9 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
2049
2060
Loc.AATags .TBAA = nullptr ;
2050
2061
2051
2062
visitPointers (const_cast <Value *>(Loc.Ptr ), *TheLoop,
2052
- [&Accesses, Loc, IsReadOnlyPtr](Value *Ptr) {
2063
+ [&Accesses, AccessTy, Loc, IsReadOnlyPtr](Value *Ptr) {
2053
2064
MemoryLocation NewLoc = Loc.getWithNewPtr (Ptr);
2054
- Accesses.addLoad (NewLoc, IsReadOnlyPtr);
2065
+ Accesses.addLoad (NewLoc, AccessTy, IsReadOnlyPtr);
2055
2066
});
2056
2067
}
2057
2068
0 commit comments