58 #define DEBUG_TYPE "memdep"
60 STATISTIC(NumCacheNonLocal,
"Number of fully cached non-local responses");
61 STATISTIC(NumCacheDirtyNonLocal,
"Number of dirty cached non-local responses");
62 STATISTIC(NumUncacheNonLocal,
"Number of uncached non-local responses");
65 "Number of fully cached non-local ptr responses");
67 "Number of cached, but dirty, non-local ptr responses");
68 STATISTIC(NumUncacheNonLocalPtr,
"Number of uncached non-local ptr responses");
70 "Number of block queries that were completely cached");
76 cl::desc(
"The number of instructions to scan in a block in memory "
77 "dependency analysis (default = 100)"));
81 cl::desc(
"The number of blocks to scan during memory "
82 "dependency analysis (default = 1000)"));
90 template <
typename KeyTy>
95 ReverseMap.
find(Inst);
96 assert(InstIt != ReverseMap.
end() &&
"Reverse map out of sync?");
97 bool Found = InstIt->second.
erase(Val);
98 assert(Found &&
"Invalid reverse map!");
100 if (InstIt->second.
empty())
101 ReverseMap.erase(InstIt);
111 if (
const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
112 if (LI->isUnordered()) {
124 if (
const StoreInst *
SI = dyn_cast<StoreInst>(Inst)) {
125 if (
SI->isUnordered()) {
137 if (
const VAArgInst *V = dyn_cast<VAArgInst>(Inst)) {
148 if (
const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
149 switch (II->getIntrinsicID()) {
150 case Intrinsic::lifetime_start:
151 case Intrinsic::lifetime_end:
152 case Intrinsic::invariant_start:
157 case Intrinsic::invariant_end:
162 case Intrinsic::masked_load:
165 case Intrinsic::masked_store:
182 MemDepResult MemoryDependenceResults::getCallDependencyFrom(
188 while (ScanIt !=
BB->begin()) {
191 if (isa<DbgInfoIntrinsic>(Inst))
210 if (
auto *CallB = dyn_cast<CallBase>(Inst)) {
215 if (isReadOnlyCall && !
isModSet(MR) &&
216 Call->isIdenticalToWhenDefined(CallB))
234 if (
BB != &
BB->getParent()->getEntryBlock())
244 if (QueryInst !=
nullptr) {
245 if (
auto *LI = dyn_cast<LoadInst>(QueryInst)) {
248 if (InvariantGroupDependency.
isDef())
249 return InvariantGroupDependency;
253 MemLoc,
isLoad, ScanIt,
BB, QueryInst, Limit, BatchAA);
254 if (SimpleDep.
isDef())
260 return InvariantGroupDependency;
263 "InvariantGroupDependency should be only unknown at this point");
279 if (!LI->
hasMetadata(LLVMContext::MD_invariant_group))
290 if (isa<GlobalValue>(LoadOperand))
295 LoadOperandsQueue.push_back(LoadOperand);
301 assert(
Other &&
"Must call it with not null instruction");
309 while (!LoadOperandsQueue.empty()) {
311 assert(Ptr && !isa<GlobalValue>(Ptr) &&
312 "Null or GlobalValue should not be inserted");
314 for (
const Use &Us : Ptr->
uses()) {
315 auto *U = dyn_cast<Instruction>(Us.getUser());
316 if (!U || U == LI || !DT.
dominates(U, LI))
321 if (isa<BitCastInst>(U)) {
322 LoadOperandsQueue.push_back(U);
330 if (
auto *
GEP = dyn_cast<GetElementPtrInst>(U))
331 if (
GEP->hasAllZeroIndices()) {
332 LoadOperandsQueue.push_back(U);
339 if ((isa<LoadInst>(U) ||
340 (isa<StoreInst>(U) &&
342 U->hasMetadata(LLVMContext::MD_invariant_group))
343 ClosestDependency = GetClosestDependency(ClosestDependency, U);
347 if (!ClosestDependency)
355 NonLocalDefsCache.try_emplace(
358 ReverseNonLocalDefsCache[ClosestDependency].
insert(LI);
366 bool isInvariantLoad =
false;
370 Limit = &DefaultLimit;
404 if (
isLoad && QueryInst) {
405 LoadInst *LI = dyn_cast<LoadInst>(QueryInst);
406 if (LI && LI->
hasMetadata(LLVMContext::MD_invariant_load))
407 isInvariantLoad =
true;
416 if (
auto *LI = dyn_cast<LoadInst>(
I))
418 if (
auto *
SI = dyn_cast<StoreInst>(
I))
420 return I->mayReadOrWriteMemory();
424 while (ScanIt !=
BB->begin()) {
429 if (isa<DbgInfoIntrinsic>(II))
443 case Intrinsic::lifetime_start: {
453 case Intrinsic::masked_load:
454 case Intrinsic::masked_store: {
462 if (
ID == Intrinsic::masked_load)
474 if (
LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
478 if (LI->isVolatile()) {
516 ClobberOffsets[LI] = R.getOffset();
541 if (!
SI->isUnordered() &&
SI->isAtomic()) {
559 if (
SI->isVolatile())
594 if (AccessPtr == Inst || BatchAA.
isMustAlias(Inst, AccessPtr))
606 if (
FenceInst *FI = dyn_cast<FenceInst>(Inst))
635 if (
BB != &
BB->getParent()->getEntryBlock())
641 ClobberOffsets.
clear();
649 if (!LocalCache.isDirty())
676 if (
auto *II = dyn_cast<IntrinsicInst>(QueryInst))
677 isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_start;
681 QueryParent, QueryInst,
nullptr);
682 }
else if (
auto *QueryCall = dyn_cast<CallBase>(QueryInst)) {
683 bool isReadOnly =
AA.onlyReadsMemory(QueryCall);
684 LocalCache = getCallDependencyFrom(QueryCall, isReadOnly,
693 ReverseLocalDeps[
I].
insert(QueryInst);
704 Count = Cache.size();
706 "Cache isn't sorted!");
713 "getNonLocalCallDependency should only be used on calls with "
715 PerInstNLInfo &CacheP = NonLocalDepsMap[QueryCall];
723 if (!Cache.empty()) {
726 if (!CacheP.second) {
733 for (
auto &Entry : Cache)
734 if (Entry.getResult().isDirty())
735 DirtyBlocks.push_back(Entry.getBB());
740 ++NumCacheDirtyNonLocal;
747 ++NumUncacheNonLocal;
751 bool isReadonlyCall =
AA.onlyReadsMemory(QueryCall);
755 unsigned NumSortedEntries = Cache.
size();
759 while (!DirtyBlocks.empty()) {
763 if (!Visited.
insert(DirtyBB).second)
769 NonLocalDepInfo::iterator Entry =
772 if (Entry != Cache.begin() && std::prev(Entry)->getBB() == DirtyBB)
776 if (Entry != Cache.begin() + NumSortedEntries &&
777 Entry->getBB() == DirtyBB) {
780 if (!Entry->getResult().isDirty())
784 ExistingResult = &*Entry;
790 if (ExistingResult) {
794 RemoveFromReverseMap<Instruction *>(ReverseNonLocalDeps, Inst,
802 if (ScanPos != DirtyBB->
begin()) {
803 Dep = getCallDependencyFrom(QueryCall, isReadonlyCall, ScanPos, DirtyBB);
825 ReverseNonLocalDeps[Inst].
insert(QueryCall);
840 bool isLoad = isa<LoadInst>(QueryInst);
845 "Can't get pointer deps of a non-pointer!");
849 auto NonLocalDefIt = NonLocalDefsCache.find(QueryInst);
850 if (NonLocalDefIt != NonLocalDefsCache.end()) {
851 Result.push_back(NonLocalDefIt->second);
852 ReverseNonLocalDefsCache[NonLocalDefIt->second.getResult().getInst()]
854 NonLocalDefsCache.erase(NonLocalDefIt);
867 if (
LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
868 return !LI->isUnordered();
869 }
else if (
StoreInst *
SI = dyn_cast<StoreInst>(Inst)) {
870 return !
SI->isUnordered();
887 if (getNonLocalPointerDepFromBB(QueryInst, Address, Loc,
isLoad, FromBB,
888 Result, Visited,
true))
900 MemDepResult MemoryDependenceResults::getNonLocalInfoForBlock(
902 BasicBlock *
BB, NonLocalDepInfo *Cache,
unsigned NumSortedEntries,
905 bool isInvariantLoad =
false;
907 if (
LoadInst *LI = dyn_cast_or_null<LoadInst>(QueryInst))
908 isInvariantLoad = LI->getMetadata(LLVMContext::MD_invariant_load);
914 if (Entry != Cache->begin() && (Entry - 1)->getBB() ==
BB)
918 if (Entry != Cache->begin() + NumSortedEntries && Entry->getBB() ==
BB)
919 ExistingResult = &*Entry;
924 if (ExistingResult && isInvariantLoad &&
926 ExistingResult =
nullptr;
930 if (ExistingResult && !ExistingResult->
getResult().isDirty()) {
931 ++NumCacheNonLocalPtr;
941 "Instruction invalidated?");
942 ++NumCacheDirtyNonLocalPtr;
946 ValueIsLoadPair CacheKey(Loc.
Ptr,
isLoad);
949 ++NumUncacheNonLocalPtr;
954 QueryInst,
nullptr, BatchAA);
976 assert(Inst &&
"Didn't depend on anything?");
977 ValueIsLoadPair CacheKey(Loc.
Ptr,
isLoad);
978 ReverseNonLocalPtrDeps[Inst].
insert(CacheKey);
988 unsigned NumSortedEntries) {
989 switch (Cache.size() - NumSortedEntries) {
997 MemoryDependenceResults::NonLocalDepInfo::iterator Entry =
999 Cache.insert(Entry, Val);
1004 if (Cache.size() != 1) {
1007 MemoryDependenceResults::NonLocalDepInfo::iterator Entry =
1009 Cache.insert(Entry, Val);
1032 bool MemoryDependenceResults::getNonLocalPointerDepFromBB(
1037 bool IsIncomplete) {
1045 NonLocalPointerInfo InitialNLPI;
1046 InitialNLPI.Size = Loc.
Size;
1047 InitialNLPI.AATags = Loc.
AATags;
1049 bool isInvariantLoad =
false;
1050 if (
LoadInst *LI = dyn_cast_or_null<LoadInst>(QueryInst))
1051 isInvariantLoad = LI->getMetadata(LLVMContext::MD_invariant_load);
1055 std::pair<CachedNonLocalPointerInfo::iterator, bool> Pair =
1056 NonLocalPointerDeps.
insert(std::make_pair(CacheKey, InitialNLPI));
1057 NonLocalPointerInfo *CacheInfo = &Pair.first->second;
1062 if (!isInvariantLoad && !Pair.second) {
1063 if (CacheInfo->Size != Loc.
Size) {
1064 bool ThrowOutEverything;
1065 if (CacheInfo->Size.hasValue() && Loc.
Size.
hasValue()) {
1069 ThrowOutEverything =
1077 if (ThrowOutEverything) {
1080 CacheInfo->Pair = BBSkipFirstBlockPair();
1081 CacheInfo->Size = Loc.
Size;
1082 for (
auto &Entry : CacheInfo->NonLocalDeps)
1083 if (
Instruction *Inst = Entry.getResult().getInst())
1085 CacheInfo->NonLocalDeps.clear();
1089 IsIncomplete =
true;
1093 return getNonLocalPointerDepFromBB(
1095 StartBB, Result, Visited, SkipFirstBlock, IsIncomplete);
1102 if (CacheInfo->AATags != Loc.
AATags) {
1103 if (CacheInfo->AATags) {
1104 CacheInfo->Pair = BBSkipFirstBlockPair();
1106 for (
auto &Entry : CacheInfo->NonLocalDeps)
1107 if (
Instruction *Inst = Entry.getResult().getInst())
1109 CacheInfo->NonLocalDeps.clear();
1113 IsIncomplete =
true;
1116 return getNonLocalPointerDepFromBB(
1118 Visited, SkipFirstBlock, IsIncomplete);
1128 if (!IsIncomplete && !isInvariantLoad &&
1129 CacheInfo->Pair == BBSkipFirstBlockPair(StartBB, SkipFirstBlock)) {
1135 if (!Visited.
empty()) {
1136 for (
auto &Entry : *Cache) {
1138 Visited.
find(Entry.getBB());
1150 for (
auto &Entry : *Cache) {
1151 Visited.
insert(std::make_pair(Entry.getBB(),
Addr));
1152 if (Entry.getResult().isNonLocal()) {
1161 ++NumCacheCompleteNonLocalPtr;
1172 if (!isInvariantLoad) {
1173 if (!IsIncomplete && Cache->empty())
1174 CacheInfo->Pair = BBSkipFirstBlockPair(StartBB, SkipFirstBlock);
1176 CacheInfo->Pair = BBSkipFirstBlockPair();
1180 Worklist.push_back(StartBB);
1190 unsigned NumSortedEntries = Cache->size();
1192 bool GotWorklistLimit =
false;
1196 while (!Worklist.empty()) {
1206 if (Cache && NumSortedEntries != Cache->size()) {
1213 CacheInfo->Pair = BBSkipFirstBlockPair();
1218 if (!SkipFirstBlock) {
1221 assert(Visited.
count(
BB) &&
"Should check 'visited' before adding to WL");
1227 QueryInst, Loc,
isLoad,
BB, Cache, NumSortedEntries, BatchAA);
1242 if (!
Pointer.NeedsPHITranslationFromBlock(
BB)) {
1243 SkipFirstBlock =
false;
1247 std::pair<DenseMap<BasicBlock *, Value *>::iterator,
bool> InsertRes =
1249 if (InsertRes.second) {
1251 NewBlocks.push_back(Pred);
1258 if (InsertRes.first->second !=
Pointer.getAddr()) {
1261 for (
unsigned i = 0;
i < NewBlocks.size();
i++)
1262 Visited.
erase(NewBlocks[
i]);
1263 goto PredTranslationFailure;
1266 if (NewBlocks.size() > WorklistEntries) {
1269 for (
unsigned i = 0;
i < NewBlocks.size();
i++)
1270 Visited.
erase(NewBlocks[
i]);
1271 GotWorklistLimit =
true;
1272 goto PredTranslationFailure;
1274 WorklistEntries -= NewBlocks.size();
1275 Worklist.
append(NewBlocks.begin(), NewBlocks.end());
1281 if (!
Pointer.IsPotentiallyPHITranslatable())
1282 goto PredTranslationFailure;
1289 if (Cache && NumSortedEntries != Cache->size()) {
1291 NumSortedEntries = Cache->size();
1297 PredList.push_back(std::make_pair(Pred, Pointer));
1310 std::pair<DenseMap<BasicBlock *, Value *>::iterator,
bool> InsertRes =
1311 Visited.
insert(std::make_pair(Pred, PredPtrVal));
1313 if (!InsertRes.second) {
1315 PredList.pop_back();
1319 if (InsertRes.first->second == PredPtrVal)
1328 for (
unsigned i = 0,
n = PredList.size();
i <
n; ++
i)
1329 Visited.
erase(PredList[
i].first);
1331 goto PredTranslationFailure;
1340 for (
unsigned i = 0,
n = PredList.size();
i <
n; ++
i) {
1345 bool CanTranslate =
true;
1351 CanTranslate =
false;
1361 if (!CanTranslate ||
1362 !getNonLocalPointerDepFromBB(QueryInst, PredPointer,
1364 Pred, Result, Visited)) {
1374 NonLocalPointerInfo &NLPI = NonLocalPointerDeps[CacheKey];
1375 NLPI.Pair = BBSkipFirstBlockPair();
1381 CacheInfo = &NonLocalPointerDeps[CacheKey];
1382 Cache = &CacheInfo->NonLocalDeps;
1383 NumSortedEntries = Cache->
size();
1389 CacheInfo->Pair = BBSkipFirstBlockPair();
1390 SkipFirstBlock =
false;
1393 PredTranslationFailure:
1400 CacheInfo = &NonLocalPointerDeps[CacheKey];
1401 Cache = &CacheInfo->NonLocalDeps;
1402 NumSortedEntries = Cache->
size();
1409 CacheInfo->Pair = BBSkipFirstBlockPair();
1421 if (!isInvariantLoad) {
1423 if (
I.getBB() !=
BB)
1426 assert((GotWorklistLimit ||
I.getResult().isNonLocal() ||
1428 "Should only be here with transparent block");
1436 (void)GotWorklistLimit;
1449 void MemoryDependenceResults::removeCachedNonLocalPointerDependencies(
1450 ValueIsLoadPair
P) {
1453 if (!NonLocalDefsCache.empty()) {
1454 auto it = NonLocalDefsCache.find(
P.getPointer());
1455 if (
it != NonLocalDefsCache.end()) {
1457 it->second.getResult().getInst(),
P.getPointer());
1458 NonLocalDefsCache.erase(
it);
1461 if (
auto *
I = dyn_cast<Instruction>(
P.getPointer())) {
1462 auto toRemoveIt = ReverseNonLocalDefsCache.
find(
I);
1463 if (toRemoveIt != ReverseNonLocalDefsCache.
end()) {
1464 for (
const auto *
entry : toRemoveIt->second)
1465 NonLocalDefsCache.erase(
entry);
1466 ReverseNonLocalDefsCache.
erase(toRemoveIt);
1472 if (It == NonLocalPointerDeps.
end())
1490 NonLocalPointerDeps.
erase(It);
1513 if (NLDI != NonLocalDepsMap.
end()) {
1515 for (
auto &Entry : BlockMap)
1516 if (
Instruction *Inst = Entry.getResult().getInst())
1518 NonLocalDepsMap.
erase(NLDI);
1523 if (LocalDepEntry != LocalDeps.
end()) {
1525 if (
Instruction *Inst = LocalDepEntry->second.getInst())
1529 LocalDeps.
erase(LocalDepEntry);
1538 removeCachedNonLocalPointerDependencies(
ValueIsLoadPair(RemInst,
false));
1539 removeCachedNonLocalPointerDependencies(
ValueIsLoadPair(RemInst,
true));
1543 auto toRemoveIt = NonLocalDefsCache.find(RemInst);
1544 if (toRemoveIt != NonLocalDefsCache.end()) {
1545 assert(isa<LoadInst>(RemInst) &&
1546 "only load instructions should be added directly");
1547 const Instruction *DepV = toRemoveIt->second.getResult().getInst();
1548 ReverseNonLocalDefsCache.
find(DepV)->second.erase(RemInst);
1549 NonLocalDefsCache.erase(toRemoveIt);
1564 NewDirtyVal = MemDepResult::getDirty(&*++RemInst->
getIterator());
1567 if (ReverseDepIt != ReverseLocalDeps.
end()) {
1570 "Nothing can locally depend on a terminator");
1572 for (
Instruction *InstDependingOnRemInst : ReverseDepIt->second) {
1573 assert(InstDependingOnRemInst != RemInst &&
1574 "Already removed our local dep info");
1576 LocalDeps[InstDependingOnRemInst] = NewDirtyVal;
1580 "There is no way something else can have "
1581 "a local dep on this if it is a terminator!");
1582 ReverseDepsToAdd.push_back(
1583 std::make_pair(NewDirtyVal.
getInst(), InstDependingOnRemInst));
1586 ReverseLocalDeps.
erase(ReverseDepIt);
1590 while (!ReverseDepsToAdd.empty()) {
1591 ReverseLocalDeps[ReverseDepsToAdd.back().first].
insert(
1592 ReverseDepsToAdd.back().second);
1593 ReverseDepsToAdd.pop_back();
1597 ReverseDepIt = ReverseNonLocalDeps.
find(RemInst);
1598 if (ReverseDepIt != ReverseNonLocalDeps.
end()) {
1600 assert(
I != RemInst &&
"Already removed NonLocalDep info for RemInst");
1602 PerInstNLInfo &INLD = NonLocalDepsMap[
I];
1606 for (
auto &Entry : INLD.first) {
1607 if (Entry.getResult().getInst() != RemInst)
1611 Entry.setResult(NewDirtyVal);
1614 ReverseDepsToAdd.push_back(std::make_pair(NextI,
I));
1618 ReverseNonLocalDeps.
erase(ReverseDepIt);
1621 while (!ReverseDepsToAdd.empty()) {
1622 ReverseNonLocalDeps[ReverseDepsToAdd.back().first].
insert(
1623 ReverseDepsToAdd.back().second);
1624 ReverseDepsToAdd.pop_back();
1631 ReverseNonLocalPtrDeps.
find(RemInst);
1632 if (ReversePtrDepIt != ReverseNonLocalPtrDeps.
end()) {
1634 ReversePtrDepsToAdd;
1637 assert(
P.getPointer() != RemInst &&
1638 "Already removed NonLocalPointerDeps info for RemInst");
1646 for (
auto &Entry : NLPDI) {
1647 if (Entry.getResult().getInst() != RemInst)
1651 Entry.setResult(NewDirtyVal);
1654 ReversePtrDepsToAdd.push_back(std::make_pair(NewDirtyInst,
P));
1662 ReverseNonLocalPtrDeps.
erase(ReversePtrDepIt);
1664 while (!ReversePtrDepsToAdd.empty()) {
1665 ReverseNonLocalPtrDeps[ReversePtrDepsToAdd.back().first].
insert(
1666 ReversePtrDepsToAdd.back().second);
1667 ReversePtrDepsToAdd.pop_back();
1674 assert(!NonLocalDepsMap.
count(RemInst) &&
"RemInst got reinserted?");
1682 void MemoryDependenceResults::verifyRemoved(
Instruction *
D)
const {
1684 for (
const auto &DepKV : LocalDeps) {
1685 assert(DepKV.first !=
D &&
"Inst occurs in data structures");
1686 assert(DepKV.second.getInst() !=
D &&
"Inst occurs in data structures");
1689 for (
const auto &DepKV : NonLocalPointerDeps) {
1690 assert(DepKV.first.getPointer() !=
D &&
"Inst occurs in NLPD map key");
1691 for (
const auto &Entry : DepKV.second.NonLocalDeps)
1692 assert(Entry.getResult().getInst() !=
D &&
"Inst occurs as NLPD value");
1695 for (
const auto &DepKV : NonLocalDepsMap) {
1696 assert(DepKV.first !=
D &&
"Inst occurs in data structures");
1697 const PerInstNLInfo &INLD = DepKV.second;
1698 for (
const auto &Entry : INLD.first)
1699 assert(Entry.getResult().getInst() !=
D &&
1700 "Inst occurs in data structures");
1703 for (
const auto &DepKV : ReverseLocalDeps) {
1704 assert(DepKV.first !=
D &&
"Inst occurs in data structures");
1706 assert(Inst !=
D &&
"Inst occurs in data structures");
1709 for (
const auto &DepKV : ReverseNonLocalDeps) {
1710 assert(DepKV.first !=
D &&
"Inst occurs in data structures");
1712 assert(Inst !=
D &&
"Inst occurs in data structures");
1715 for (
const auto &DepKV : ReverseNonLocalPtrDeps) {
1716 assert(DepKV.first !=
D &&
"Inst occurs in rev NLPD map");
1718 for (ValueIsLoadPair
P : DepKV.second)
1719 assert(
P != ValueIsLoadPair(
D,
false) &&
P != ValueIsLoadPair(
D,
true) &&
1720 "Inst occurs in ReverseNonLocalPtrDeps map");
1743 "Memory Dependence Analysis",
false,
true)
1791 return DefaultBlockScanLimit;
1795 auto &
AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
1796 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
F);
1797 auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
F);
1798 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1799 auto &PV = getAnalysis<PhiValuesWrapperPass>().getResult();