LLVM 19.0.0git
MemoryDependenceAnalysis.cpp
Go to the documentation of this file.
1//===- MemoryDependenceAnalysis.cpp - Mem Deps Implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements an analysis that determines, for a given memory
10// operation, what preceding memory operations it depends on. It builds on
11// alias analysis information, and tries to provide a lazy, caching interface to
12// a common kind of alias information query.
13//
14//===----------------------------------------------------------------------===//
15
17#include "llvm/ADT/DenseMap.h"
18#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/Statistic.h"
29#include "llvm/IR/BasicBlock.h"
30#include "llvm/IR/Dominators.h"
31#include "llvm/IR/Function.h"
32#include "llvm/IR/InstrTypes.h"
33#include "llvm/IR/Instruction.h"
36#include "llvm/IR/LLVMContext.h"
37#include "llvm/IR/Metadata.h"
38#include "llvm/IR/Module.h"
40#include "llvm/IR/Type.h"
41#include "llvm/IR/Use.h"
42#include "llvm/IR/Value.h"
44#include "llvm/Pass.h"
49#include "llvm/Support/Debug.h"
50#include <algorithm>
51#include <cassert>
52#include <iterator>
53#include <utility>
54
55using namespace llvm;
56
57#define DEBUG_TYPE "memdep"
58
59STATISTIC(NumCacheNonLocal, "Number of fully cached non-local responses");
60STATISTIC(NumCacheDirtyNonLocal, "Number of dirty cached non-local responses");
61STATISTIC(NumUncacheNonLocal, "Number of uncached non-local responses");
62
63STATISTIC(NumCacheNonLocalPtr,
64 "Number of fully cached non-local ptr responses");
65STATISTIC(NumCacheDirtyNonLocalPtr,
66 "Number of cached, but dirty, non-local ptr responses");
67STATISTIC(NumUncacheNonLocalPtr, "Number of uncached non-local ptr responses");
68STATISTIC(NumCacheCompleteNonLocalPtr,
69 "Number of block queries that were completely cached");
70
71// Limit for the number of instructions to scan in a block.
72
74 "memdep-block-scan-limit", cl::Hidden, cl::init(100),
75 cl::desc("The number of instructions to scan in a block in memory "
76 "dependency analysis (default = 100)"));
77
79 BlockNumberLimit("memdep-block-number-limit", cl::Hidden, cl::init(200),
80 cl::desc("The number of blocks to scan during memory "
81 "dependency analysis (default = 200)"));
82
83// Limit on the number of memdep results to process.
84static const unsigned int NumResultsLimit = 100;
85
86/// This is a helper function that removes Val from 'Inst's set in ReverseMap.
87///
88/// If the set becomes empty, remove Inst's entry.
89template <typename KeyTy>
90static void
92 Instruction *Inst, KeyTy Val) {
93 typename DenseMap<Instruction *, SmallPtrSet<KeyTy, 4>>::iterator InstIt =
94 ReverseMap.find(Inst);
95 assert(InstIt != ReverseMap.end() && "Reverse map out of sync?");
96 bool Found = InstIt->second.erase(Val);
97 assert(Found && "Invalid reverse map!");
98 (void)Found;
99 if (InstIt->second.empty())
100 ReverseMap.erase(InstIt);
101}
102
103/// If the given instruction references a specific memory location, fill in Loc
104/// with the details, otherwise set Loc.Ptr to null.
105///
106/// Returns a ModRefInfo value describing the general behavior of the
107/// instruction.
109 const TargetLibraryInfo &TLI) {
110 if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
111 if (LI->isUnordered()) {
112 Loc = MemoryLocation::get(LI);
113 return ModRefInfo::Ref;
114 }
115 if (LI->getOrdering() == AtomicOrdering::Monotonic) {
116 Loc = MemoryLocation::get(LI);
117 return ModRefInfo::ModRef;
118 }
119 Loc = MemoryLocation();
120 return ModRefInfo::ModRef;
121 }
122
123 if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
124 if (SI->isUnordered()) {
125 Loc = MemoryLocation::get(SI);
126 return ModRefInfo::Mod;
127 }
128 if (SI->getOrdering() == AtomicOrdering::Monotonic) {
129 Loc = MemoryLocation::get(SI);
130 return ModRefInfo::ModRef;
131 }
132 Loc = MemoryLocation();
133 return ModRefInfo::ModRef;
134 }
135
136 if (const VAArgInst *V = dyn_cast<VAArgInst>(Inst)) {
137 Loc = MemoryLocation::get(V);
138 return ModRefInfo::ModRef;
139 }
140
141 if (const CallBase *CB = dyn_cast<CallBase>(Inst)) {
142 if (Value *FreedOp = getFreedOperand(CB, &TLI)) {
143 // calls to free() deallocate the entire structure
144 Loc = MemoryLocation::getAfter(FreedOp);
145 return ModRefInfo::Mod;
146 }
147 }
148
149 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
150 switch (II->getIntrinsicID()) {
151 case Intrinsic::lifetime_start:
152 case Intrinsic::lifetime_end:
153 case Intrinsic::invariant_start:
154 Loc = MemoryLocation::getForArgument(II, 1, TLI);
155 // These intrinsics don't really modify the memory, but returning Mod
156 // will allow them to be handled conservatively.
157 return ModRefInfo::Mod;
158 case Intrinsic::invariant_end:
159 Loc = MemoryLocation::getForArgument(II, 2, TLI);
160 // These intrinsics don't really modify the memory, but returning Mod
161 // will allow them to be handled conservatively.
162 return ModRefInfo::Mod;
163 case Intrinsic::masked_load:
164 Loc = MemoryLocation::getForArgument(II, 0, TLI);
165 return ModRefInfo::Ref;
166 case Intrinsic::masked_store:
167 Loc = MemoryLocation::getForArgument(II, 1, TLI);
168 return ModRefInfo::Mod;
169 default:
170 break;
171 }
172 }
173
174 // Otherwise, just do the coarse-grained thing that always works.
175 if (Inst->mayWriteToMemory())
176 return ModRefInfo::ModRef;
177 if (Inst->mayReadFromMemory())
178 return ModRefInfo::Ref;
179 return ModRefInfo::NoModRef;
180}
181
182/// Private helper for finding the local dependencies of a call site.
183MemDepResult MemoryDependenceResults::getCallDependencyFrom(
184 CallBase *Call, bool isReadOnlyCall, BasicBlock::iterator ScanIt,
185 BasicBlock *BB) {
186 unsigned Limit = getDefaultBlockScanLimit();
187
188 // Walk backwards through the block, looking for dependencies.
189 while (ScanIt != BB->begin()) {
190 Instruction *Inst = &*--ScanIt;
191 // Debug intrinsics don't cause dependences and should not affect Limit
192 if (isa<DbgInfoIntrinsic>(Inst))
193 continue;
194
195 // Limit the amount of scanning we do so we don't end up with quadratic
196 // running time on extreme testcases.
197 --Limit;
198 if (!Limit)
200
201 // If this inst is a memory op, get the pointer it accessed
202 MemoryLocation Loc;
203 ModRefInfo MR = GetLocation(Inst, Loc, TLI);
204 if (Loc.Ptr) {
205 // A simple instruction.
206 if (isModOrRefSet(AA.getModRefInfo(Call, Loc)))
207 return MemDepResult::getClobber(Inst);
208 continue;
209 }
210
211 if (auto *CallB = dyn_cast<CallBase>(Inst)) {
212 // If these two calls do not interfere, look past it.
213 if (isNoModRef(AA.getModRefInfo(Call, CallB))) {
214 // If the two calls are the same, return Inst as a Def, so that
215 // Call can be found redundant and eliminated.
216 if (isReadOnlyCall && !isModSet(MR) &&
217 Call->isIdenticalToWhenDefined(CallB))
218 return MemDepResult::getDef(Inst);
219
220 // Otherwise if the two calls don't interact (e.g. CallB is readnone)
221 // keep scanning.
222 continue;
223 } else
224 return MemDepResult::getClobber(Inst);
225 }
226
227 // If we could not obtain a pointer for the instruction and the instruction
228 // touches memory then assume that this is a dependency.
229 if (isModOrRefSet(MR))
230 return MemDepResult::getClobber(Inst);
231 }
232
233 // No dependence found. If this is the entry block of the function, it is
234 // unknown, otherwise it is non-local.
235 if (BB != &BB->getParent()->getEntryBlock())
238}
239
241 const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
242 BasicBlock *BB, Instruction *QueryInst, unsigned *Limit,
243 BatchAAResults &BatchAA) {
244 MemDepResult InvariantGroupDependency = MemDepResult::getUnknown();
245 if (QueryInst != nullptr) {
246 if (auto *LI = dyn_cast<LoadInst>(QueryInst)) {
247 InvariantGroupDependency = getInvariantGroupPointerDependency(LI, BB);
248
249 if (InvariantGroupDependency.isDef())
250 return InvariantGroupDependency;
251 }
252 }
254 MemLoc, isLoad, ScanIt, BB, QueryInst, Limit, BatchAA);
255 if (SimpleDep.isDef())
256 return SimpleDep;
257 // Non-local invariant group dependency indicates there is non local Def
258 // (it only returns nonLocal if it finds nonLocal def), which is better than
259 // local clobber and everything else.
260 if (InvariantGroupDependency.isNonLocal())
261 return InvariantGroupDependency;
262
263 assert(InvariantGroupDependency.isUnknown() &&
264 "InvariantGroupDependency should be only unknown at this point");
265 return SimpleDep;
266}
267
269 const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
270 BasicBlock *BB, Instruction *QueryInst, unsigned *Limit) {
271 BatchAAResults BatchAA(AA, &EII);
272 return getPointerDependencyFrom(MemLoc, isLoad, ScanIt, BB, QueryInst, Limit,
273 BatchAA);
274}
275
278 BasicBlock *BB) {
279
280 if (!LI->hasMetadata(LLVMContext::MD_invariant_group))
282
283 // Take the ptr operand after all casts and geps 0. This way we can search
284 // cast graph down only.
285 Value *LoadOperand = LI->getPointerOperand()->stripPointerCasts();
286
287 // It's is not safe to walk the use list of global value, because function
288 // passes aren't allowed to look outside their functions.
289 // FIXME: this could be fixed by filtering instructions from outside
290 // of current function.
291 if (isa<GlobalValue>(LoadOperand))
293
294 // Queue to process all pointers that are equivalent to load operand.
295 SmallVector<const Value *, 8> LoadOperandsQueue;
296 LoadOperandsQueue.push_back(LoadOperand);
297
298 Instruction *ClosestDependency = nullptr;
299 // Order of instructions in uses list is unpredictible. In order to always
300 // get the same result, we will look for the closest dominance.
301 auto GetClosestDependency = [this](Instruction *Best, Instruction *Other) {
302 assert(Other && "Must call it with not null instruction");
303 if (Best == nullptr || DT.dominates(Best, Other))
304 return Other;
305 return Best;
306 };
307
308 // FIXME: This loop is O(N^2) because dominates can be O(n) and in worst case
309 // we will see all the instructions. This should be fixed in MSSA.
310 while (!LoadOperandsQueue.empty()) {
311 const Value *Ptr = LoadOperandsQueue.pop_back_val();
312 assert(Ptr && !isa<GlobalValue>(Ptr) &&
313 "Null or GlobalValue should not be inserted");
314
315 for (const Use &Us : Ptr->uses()) {
316 auto *U = dyn_cast<Instruction>(Us.getUser());
317 if (!U || U == LI || !DT.dominates(U, LI))
318 continue;
319
320 // Bitcast or gep with zeros are using Ptr. Add to queue to check it's
321 // users. U = bitcast Ptr
322 if (isa<BitCastInst>(U)) {
323 LoadOperandsQueue.push_back(U);
324 continue;
325 }
326 // Gep with zeros is equivalent to bitcast.
327 // FIXME: we are not sure if some bitcast should be canonicalized to gep 0
328 // or gep 0 to bitcast because of SROA, so there are 2 forms. When
329 // typeless pointers will be ready then both cases will be gone
330 // (and this BFS also won't be needed).
331 if (auto *GEP = dyn_cast<GetElementPtrInst>(U))
332 if (GEP->hasAllZeroIndices()) {
333 LoadOperandsQueue.push_back(U);
334 continue;
335 }
336
337 // If we hit load/store with the same invariant.group metadata (and the
338 // same pointer operand) we can assume that value pointed by pointer
339 // operand didn't change.
340 if ((isa<LoadInst>(U) ||
341 (isa<StoreInst>(U) &&
342 cast<StoreInst>(U)->getPointerOperand() == Ptr)) &&
343 U->hasMetadata(LLVMContext::MD_invariant_group))
344 ClosestDependency = GetClosestDependency(ClosestDependency, U);
345 }
346 }
347
348 if (!ClosestDependency)
350 if (ClosestDependency->getParent() == BB)
351 return MemDepResult::getDef(ClosestDependency);
352 // Def(U) can't be returned here because it is non-local. If local
353 // dependency won't be found then return nonLocal counting that the
354 // user will call getNonLocalPointerDependency, which will return cached
355 // result.
356 NonLocalDefsCache.try_emplace(
357 LI, NonLocalDepResult(ClosestDependency->getParent(),
358 MemDepResult::getDef(ClosestDependency), nullptr));
359 ReverseNonLocalDefsCache[ClosestDependency].insert(LI);
361}
362
363// Check if SI that may alias with MemLoc can be safely skipped. This is
364// possible in case if SI can only must alias or no alias with MemLoc (no
365// partial overlapping possible) and it writes the same value that MemLoc
366// contains now (it was loaded before this store and was not modified in
367// between).
368static bool canSkipClobberingStore(const StoreInst *SI,
369 const MemoryLocation &MemLoc,
370 Align MemLocAlign, BatchAAResults &BatchAA,
371 unsigned ScanLimit) {
372 if (!MemLoc.Size.hasValue())
373 return false;
374 if (MemoryLocation::get(SI).Size != MemLoc.Size)
375 return false;
376 if (MemLoc.Size.isScalable())
377 return false;
378 if (std::min(MemLocAlign, SI->getAlign()).value() <
379 MemLoc.Size.getValue().getKnownMinValue())
380 return false;
381
382 auto *LI = dyn_cast<LoadInst>(SI->getValueOperand());
383 if (!LI || LI->getParent() != SI->getParent())
384 return false;
385 if (BatchAA.alias(MemoryLocation::get(LI), MemLoc) != AliasResult::MustAlias)
386 return false;
387 unsigned NumVisitedInsts = 0;
388 for (const Instruction *I = LI; I != SI; I = I->getNextNonDebugInstruction())
389 if (++NumVisitedInsts > ScanLimit ||
390 isModSet(BatchAA.getModRefInfo(I, MemLoc)))
391 return false;
392
393 return true;
394}
395
397 const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
398 BasicBlock *BB, Instruction *QueryInst, unsigned *Limit,
399 BatchAAResults &BatchAA) {
400 bool isInvariantLoad = false;
401 Align MemLocAlign =
403
404 unsigned DefaultLimit = getDefaultBlockScanLimit();
405 if (!Limit)
406 Limit = &DefaultLimit;
407
408 // We must be careful with atomic accesses, as they may allow another thread
409 // to touch this location, clobbering it. We are conservative: if the
410 // QueryInst is not a simple (non-atomic) memory access, we automatically
411 // return getClobber.
412 // If it is simple, we know based on the results of
413 // "Compiler testing via a theory of sound optimisations in the C11/C++11
414 // memory model" in PLDI 2013, that a non-atomic location can only be
415 // clobbered between a pair of a release and an acquire action, with no
416 // access to the location in between.
417 // Here is an example for giving the general intuition behind this rule.
418 // In the following code:
419 // store x 0;
420 // release action; [1]
421 // acquire action; [4]
422 // %val = load x;
423 // It is unsafe to replace %val by 0 because another thread may be running:
424 // acquire action; [2]
425 // store x 42;
426 // release action; [3]
427 // with synchronization from 1 to 2 and from 3 to 4, resulting in %val
428 // being 42. A key property of this program however is that if either
429 // 1 or 4 were missing, there would be a race between the store of 42
430 // either the store of 0 or the load (making the whole program racy).
431 // The paper mentioned above shows that the same property is respected
432 // by every program that can detect any optimization of that kind: either
433 // it is racy (undefined) or there is a release followed by an acquire
434 // between the pair of accesses under consideration.
435
436 // If the load is invariant, we "know" that it doesn't alias *any* write. We
437 // do want to respect mustalias results since defs are useful for value
438 // forwarding, but any mayalias write can be assumed to be noalias.
439 // Arguably, this logic should be pushed inside AliasAnalysis itself.
440 if (isLoad && QueryInst)
441 if (LoadInst *LI = dyn_cast<LoadInst>(QueryInst)) {
442 if (LI->hasMetadata(LLVMContext::MD_invariant_load))
443 isInvariantLoad = true;
444 MemLocAlign = LI->getAlign();
445 }
446
447 // True for volatile instruction.
448 // For Load/Store return true if atomic ordering is stronger than AO,
449 // for other instruction just true if it can read or write to memory.
450 auto isComplexForReordering = [](Instruction * I, AtomicOrdering AO)->bool {
451 if (I->isVolatile())
452 return true;
453 if (auto *LI = dyn_cast<LoadInst>(I))
454 return isStrongerThan(LI->getOrdering(), AO);
455 if (auto *SI = dyn_cast<StoreInst>(I))
456 return isStrongerThan(SI->getOrdering(), AO);
457 return I->mayReadOrWriteMemory();
458 };
459
460 // Walk backwards through the basic block, looking for dependencies.
461 while (ScanIt != BB->begin()) {
462 Instruction *Inst = &*--ScanIt;
463
464 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
465 // Debug intrinsics don't (and can't) cause dependencies.
466 if (isa<DbgInfoIntrinsic>(II))
467 continue;
468
469 // Limit the amount of scanning we do so we don't end up with quadratic
470 // running time on extreme testcases.
471 --*Limit;
472 if (!*Limit)
474
475 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
476 // If we reach a lifetime begin or end marker, then the query ends here
477 // because the value is undefined.
478 Intrinsic::ID ID = II->getIntrinsicID();
479 switch (ID) {
480 case Intrinsic::lifetime_start: {
481 // FIXME: This only considers queries directly on the invariant-tagged
482 // pointer, not on query pointers that are indexed off of them. It'd
483 // be nice to handle that at some point (the right approach is to use
484 // GetPointerBaseWithConstantOffset).
485 MemoryLocation ArgLoc = MemoryLocation::getAfter(II->getArgOperand(1));
486 if (BatchAA.isMustAlias(ArgLoc, MemLoc))
487 return MemDepResult::getDef(II);
488 continue;
489 }
490 case Intrinsic::masked_load:
491 case Intrinsic::masked_store: {
492 MemoryLocation Loc;
493 /*ModRefInfo MR =*/ GetLocation(II, Loc, TLI);
494 AliasResult R = BatchAA.alias(Loc, MemLoc);
495 if (R == AliasResult::NoAlias)
496 continue;
497 if (R == AliasResult::MustAlias)
498 return MemDepResult::getDef(II);
499 if (ID == Intrinsic::masked_load)
500 continue;
502 }
503 }
504 }
505
506 // Values depend on loads if the pointers are must aliased. This means
507 // that a load depends on another must aliased load from the same value.
508 // One exception is atomic loads: a value can depend on an atomic load that
509 // it does not alias with when this atomic load indicates that another
510 // thread may be accessing the location.
511 if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
512 // While volatile access cannot be eliminated, they do not have to clobber
513 // non-aliasing locations, as normal accesses, for example, can be safely
514 // reordered with volatile accesses.
515 if (LI->isVolatile()) {
516 if (!QueryInst)
517 // Original QueryInst *may* be volatile
518 return MemDepResult::getClobber(LI);
519 if (QueryInst->isVolatile())
520 // Ordering required if QueryInst is itself volatile
521 return MemDepResult::getClobber(LI);
522 // Otherwise, volatile doesn't imply any special ordering
523 }
524
525 // Atomic loads have complications involved.
526 // A Monotonic (or higher) load is OK if the query inst is itself not
527 // atomic.
528 // FIXME: This is overly conservative.
529 if (LI->isAtomic() && isStrongerThanUnordered(LI->getOrdering())) {
530 if (!QueryInst ||
531 isComplexForReordering(QueryInst, AtomicOrdering::NotAtomic))
532 return MemDepResult::getClobber(LI);
533 if (LI->getOrdering() != AtomicOrdering::Monotonic)
534 return MemDepResult::getClobber(LI);
535 }
536
538
539 // If we found a pointer, check if it could be the same as our pointer.
540 AliasResult R = BatchAA.alias(LoadLoc, MemLoc);
541
542 if (R == AliasResult::NoAlias)
543 continue;
544
545 if (isLoad) {
546 // Must aliased loads are defs of each other.
547 if (R == AliasResult::MustAlias)
548 return MemDepResult::getDef(Inst);
549
550 // If we have a partial alias, then return this as a clobber for the
551 // client to handle.
552 if (R == AliasResult::PartialAlias && R.hasOffset()) {
553 ClobberOffsets[LI] = R.getOffset();
554 return MemDepResult::getClobber(Inst);
555 }
556
557 // Random may-alias loads don't depend on each other without a
558 // dependence.
559 continue;
560 }
561
562 // Stores don't alias loads from read-only memory.
563 if (!isModSet(BatchAA.getModRefInfoMask(LoadLoc)))
564 continue;
565
566 // Stores depend on may/must aliased loads.
567 return MemDepResult::getDef(Inst);
568 }
569
570 if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
571 // Atomic stores have complications involved.
572 // A Monotonic store is OK if the query inst is itself not atomic.
573 // FIXME: This is overly conservative.
574 if (!SI->isUnordered() && SI->isAtomic()) {
575 if (!QueryInst ||
576 isComplexForReordering(QueryInst, AtomicOrdering::Unordered))
577 return MemDepResult::getClobber(SI);
578 // Ok, if we are here the guard above guarantee us that
579 // QueryInst is a non-atomic or unordered load/store.
580 // SI is atomic with monotonic or release semantic (seq_cst for store
581 // is actually a release semantic plus total order over other seq_cst
582 // instructions, as soon as QueryInst is not seq_cst we can consider it
583 // as simple release semantic).
584 // Monotonic and Release semantic allows re-ordering before store
585 // so we are safe to go further and check the aliasing. It will prohibit
586 // re-ordering in case locations are may or must alias.
587 }
588
589 // While volatile access cannot be eliminated, they do not have to clobber
590 // non-aliasing locations, as normal accesses can for example be reordered
591 // with volatile accesses.
592 if (SI->isVolatile())
593 if (!QueryInst || QueryInst->isVolatile())
594 return MemDepResult::getClobber(SI);
595
596 // If alias analysis can tell that this store is guaranteed to not modify
597 // the query pointer, ignore it. Use getModRefInfo to handle cases where
598 // the query pointer points to constant memory etc.
599 if (!isModOrRefSet(BatchAA.getModRefInfo(SI, MemLoc)))
600 continue;
601
602 // Ok, this store might clobber the query pointer. Check to see if it is
603 // a must alias: in this case, we want to return this as a def.
604 // FIXME: Use ModRefInfo::Must bit from getModRefInfo call above.
605 MemoryLocation StoreLoc = MemoryLocation::get(SI);
606
607 // If we found a pointer, check if it could be the same as our pointer.
608 AliasResult R = BatchAA.alias(StoreLoc, MemLoc);
609
610 if (R == AliasResult::NoAlias)
611 continue;
612 if (R == AliasResult::MustAlias)
613 return MemDepResult::getDef(Inst);
614 if (isInvariantLoad)
615 continue;
616 if (canSkipClobberingStore(SI, MemLoc, MemLocAlign, BatchAA, *Limit))
617 continue;
618 return MemDepResult::getClobber(Inst);
619 }
620
621 // If this is an allocation, and if we know that the accessed pointer is to
622 // the allocation, return Def. This means that there is no dependence and
623 // the access can be optimized based on that. For example, a load could
624 // turn into undef. Note that we can bypass the allocation itself when
625 // looking for a clobber in many cases; that's an alias property and is
626 // handled by BasicAA.
627 if (isa<AllocaInst>(Inst) || isNoAliasCall(Inst)) {
628 const Value *AccessPtr = getUnderlyingObject(MemLoc.Ptr);
629 if (AccessPtr == Inst || BatchAA.isMustAlias(Inst, AccessPtr))
630 return MemDepResult::getDef(Inst);
631 }
632
633 // If we found a select instruction for MemLoc pointer, return it as Def
634 // dependency.
635 if (isa<SelectInst>(Inst) && MemLoc.Ptr == Inst)
636 return MemDepResult::getDef(Inst);
637
638 if (isInvariantLoad)
639 continue;
640
641 // A release fence requires that all stores complete before it, but does
642 // not prevent the reordering of following loads or stores 'before' the
643 // fence. As a result, we look past it when finding a dependency for
644 // loads. DSE uses this to find preceding stores to delete and thus we
645 // can't bypass the fence if the query instruction is a store.
646 if (FenceInst *FI = dyn_cast<FenceInst>(Inst))
647 if (isLoad && FI->getOrdering() == AtomicOrdering::Release)
648 continue;
649
650 // See if this instruction (e.g. a call or vaarg) mod/ref's the pointer.
651 switch (BatchAA.getModRefInfo(Inst, MemLoc)) {
653 // If the call has no effect on the queried pointer, just ignore it.
654 continue;
655 case ModRefInfo::Mod:
656 return MemDepResult::getClobber(Inst);
657 case ModRefInfo::Ref:
658 // If the call is known to never store to the pointer, and if this is a
659 // load query, we can safely ignore it (scan past it).
660 if (isLoad)
661 continue;
662 [[fallthrough]];
663 default:
664 // Otherwise, there is a potential dependence. Return a clobber.
665 return MemDepResult::getClobber(Inst);
666 }
667 }
668
669 // No dependence found. If this is the entry block of the function, it is
670 // unknown, otherwise it is non-local.
671 if (BB != &BB->getParent()->getEntryBlock())
674}
675
677 ClobberOffsets.clear();
678 Instruction *ScanPos = QueryInst;
679
680 // Check for a cached result
681 MemDepResult &LocalCache = LocalDeps[QueryInst];
682
683 // If the cached entry is non-dirty, just return it. Note that this depends
684 // on MemDepResult's default constructing to 'dirty'.
685 if (!LocalCache.isDirty())
686 return LocalCache;
687
688 // Otherwise, if we have a dirty entry, we know we can start the scan at that
689 // instruction, which may save us some work.
690 if (Instruction *Inst = LocalCache.getInst()) {
691 ScanPos = Inst;
692
693 RemoveFromReverseMap(ReverseLocalDeps, Inst, QueryInst);
694 }
695
696 BasicBlock *QueryParent = QueryInst->getParent();
697
698 // Do the scan.
699 if (BasicBlock::iterator(QueryInst) == QueryParent->begin()) {
700 // No dependence found. If this is the entry block of the function, it is
701 // unknown, otherwise it is non-local.
702 if (QueryParent != &QueryParent->getParent()->getEntryBlock())
703 LocalCache = MemDepResult::getNonLocal();
704 else
705 LocalCache = MemDepResult::getNonFuncLocal();
706 } else {
707 MemoryLocation MemLoc;
708 ModRefInfo MR = GetLocation(QueryInst, MemLoc, TLI);
709 if (MemLoc.Ptr) {
710 // If we can do a pointer scan, make it happen.
711 bool isLoad = !isModSet(MR);
712 if (auto *II = dyn_cast<IntrinsicInst>(QueryInst))
713 isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_start;
714
715 LocalCache =
716 getPointerDependencyFrom(MemLoc, isLoad, ScanPos->getIterator(),
717 QueryParent, QueryInst, nullptr);
718 } else if (auto *QueryCall = dyn_cast<CallBase>(QueryInst)) {
719 bool isReadOnly = AA.onlyReadsMemory(QueryCall);
720 LocalCache = getCallDependencyFrom(QueryCall, isReadOnly,
721 ScanPos->getIterator(), QueryParent);
722 } else
723 // Non-memory instruction.
724 LocalCache = MemDepResult::getUnknown();
725 }
726
727 // Remember the result!
728 if (Instruction *I = LocalCache.getInst())
729 ReverseLocalDeps[I].insert(QueryInst);
730
731 return LocalCache;
732}
733
734#ifndef NDEBUG
735/// This method is used when -debug is specified to verify that cache arrays
736/// are properly kept sorted.
738 int Count = -1) {
739 if (Count == -1)
740 Count = Cache.size();
741 assert(std::is_sorted(Cache.begin(), Cache.begin() + Count) &&
742 "Cache isn't sorted!");
743}
744#endif
745
748 assert(getDependency(QueryCall).isNonLocal() &&
749 "getNonLocalCallDependency should only be used on calls with "
750 "non-local deps!");
751 PerInstNLInfo &CacheP = NonLocalDepsMap[QueryCall];
752 NonLocalDepInfo &Cache = CacheP.first;
753
754 // This is the set of blocks that need to be recomputed. In the cached case,
755 // this can happen due to instructions being deleted etc. In the uncached
756 // case, this starts out as the set of predecessors we care about.
758
759 if (!Cache.empty()) {
760 // Okay, we have a cache entry. If we know it is not dirty, just return it
761 // with no computation.
762 if (!CacheP.second) {
763 ++NumCacheNonLocal;
764 return Cache;
765 }
766
767 // If we already have a partially computed set of results, scan them to
768 // determine what is dirty, seeding our initial DirtyBlocks worklist.
769 for (auto &Entry : Cache)
770 if (Entry.getResult().isDirty())
771 DirtyBlocks.push_back(Entry.getBB());
772
773 // Sort the cache so that we can do fast binary search lookups below.
774 llvm::sort(Cache);
775
776 ++NumCacheDirtyNonLocal;
777 } else {
778 // Seed DirtyBlocks with each of the preds of QueryInst's block.
779 BasicBlock *QueryBB = QueryCall->getParent();
780 append_range(DirtyBlocks, PredCache.get(QueryBB));
781 ++NumUncacheNonLocal;
782 }
783
784 // isReadonlyCall - If this is a read-only call, we can be more aggressive.
785 bool isReadonlyCall = AA.onlyReadsMemory(QueryCall);
786
788
789 unsigned NumSortedEntries = Cache.size();
790 LLVM_DEBUG(AssertSorted(Cache));
791
792 // Iterate while we still have blocks to update.
793 while (!DirtyBlocks.empty()) {
794 BasicBlock *DirtyBB = DirtyBlocks.pop_back_val();
795
796 // Already processed this block?
797 if (!Visited.insert(DirtyBB).second)
798 continue;
799
800 // Do a binary search to see if we already have an entry for this block in
801 // the cache set. If so, find it.
802 LLVM_DEBUG(AssertSorted(Cache, NumSortedEntries));
803 NonLocalDepInfo::iterator Entry =
804 std::upper_bound(Cache.begin(), Cache.begin() + NumSortedEntries,
805 NonLocalDepEntry(DirtyBB));
806 if (Entry != Cache.begin() && std::prev(Entry)->getBB() == DirtyBB)
807 --Entry;
808
809 NonLocalDepEntry *ExistingResult = nullptr;
810 if (Entry != Cache.begin() + NumSortedEntries &&
811 Entry->getBB() == DirtyBB) {
812 // If we already have an entry, and if it isn't already dirty, the block
813 // is done.
814 if (!Entry->getResult().isDirty())
815 continue;
816
817 // Otherwise, remember this slot so we can update the value.
818 ExistingResult = &*Entry;
819 }
820
821 // If the dirty entry has a pointer, start scanning from it so we don't have
822 // to rescan the entire block.
823 BasicBlock::iterator ScanPos = DirtyBB->end();
824 if (ExistingResult) {
825 if (Instruction *Inst = ExistingResult->getResult().getInst()) {
826 ScanPos = Inst->getIterator();
827 // We're removing QueryInst's use of Inst.
828 RemoveFromReverseMap<Instruction *>(ReverseNonLocalDeps, Inst,
829 QueryCall);
830 }
831 }
832
833 // Find out if this block has a local dependency for QueryInst.
834 MemDepResult Dep;
835
836 if (ScanPos != DirtyBB->begin()) {
837 Dep = getCallDependencyFrom(QueryCall, isReadonlyCall, ScanPos, DirtyBB);
838 } else if (DirtyBB != &DirtyBB->getParent()->getEntryBlock()) {
839 // No dependence found. If this is the entry block of the function, it is
840 // a clobber, otherwise it is unknown.
842 } else {
844 }
845
846 // If we had a dirty entry for the block, update it. Otherwise, just add
847 // a new entry.
848 if (ExistingResult)
849 ExistingResult->setResult(Dep);
850 else
851 Cache.push_back(NonLocalDepEntry(DirtyBB, Dep));
852
853 // If the block has a dependency (i.e. it isn't completely transparent to
854 // the value), remember the association!
855 if (!Dep.isNonLocal()) {
856 // Keep the ReverseNonLocalDeps map up to date so we can efficiently
857 // update this when we remove instructions.
858 if (Instruction *Inst = Dep.getInst())
859 ReverseNonLocalDeps[Inst].insert(QueryCall);
860 } else {
861
862 // If the block *is* completely transparent to the load, we need to check
863 // the predecessors of this block. Add them to our worklist.
864 append_range(DirtyBlocks, PredCache.get(DirtyBB));
865 }
866 }
867
868 return Cache;
869}
870
873 const MemoryLocation Loc = MemoryLocation::get(QueryInst);
874 bool isLoad = isa<LoadInst>(QueryInst);
875 BasicBlock *FromBB = QueryInst->getParent();
876 assert(FromBB);
877
878 assert(Loc.Ptr->getType()->isPointerTy() &&
879 "Can't get pointer deps of a non-pointer!");
880 Result.clear();
881 {
882 // Check if there is cached Def with invariant.group.
883 auto NonLocalDefIt = NonLocalDefsCache.find(QueryInst);
884 if (NonLocalDefIt != NonLocalDefsCache.end()) {
885 Result.push_back(NonLocalDefIt->second);
886 ReverseNonLocalDefsCache[NonLocalDefIt->second.getResult().getInst()]
887 .erase(QueryInst);
888 NonLocalDefsCache.erase(NonLocalDefIt);
889 return;
890 }
891 }
892 // This routine does not expect to deal with volatile instructions.
893 // Doing so would require piping through the QueryInst all the way through.
894 // TODO: volatiles can't be elided, but they can be reordered with other
895 // non-volatile accesses.
896
897 // We currently give up on any instruction which is ordered, but we do handle
898 // atomic instructions which are unordered.
899 // TODO: Handle ordered instructions
900 auto isOrdered = [](Instruction *Inst) {
901 if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
902 return !LI->isUnordered();
903 } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
904 return !SI->isUnordered();
905 }
906 return false;
907 };
908 if (QueryInst->isVolatile() || isOrdered(QueryInst)) {
909 Result.push_back(NonLocalDepResult(FromBB, MemDepResult::getUnknown(),
910 const_cast<Value *>(Loc.Ptr)));
911 return;
912 }
913 const DataLayout &DL = FromBB->getModule()->getDataLayout();
914 PHITransAddr Address(const_cast<Value *>(Loc.Ptr), DL, &AC);
915
916 // This is the set of blocks we've inspected, and the pointer we consider in
917 // each block. Because of critical edges, we currently bail out if querying
918 // a block with multiple different pointers. This can happen during PHI
919 // translation.
921 if (getNonLocalPointerDepFromBB(QueryInst, Address, Loc, isLoad, FromBB,
922 Result, Visited, true))
923 return;
924 Result.clear();
925 Result.push_back(NonLocalDepResult(FromBB, MemDepResult::getUnknown(),
926 const_cast<Value *>(Loc.Ptr)));
927}
928
929/// Compute the memdep value for BB with Pointer/PointeeSize using either
930/// cached information in Cache or by doing a lookup (which may use dirty cache
931/// info if available).
932///
933/// If we do a lookup, add the result to the cache.
934MemDepResult MemoryDependenceResults::getNonLocalInfoForBlock(
935 Instruction *QueryInst, const MemoryLocation &Loc, bool isLoad,
936 BasicBlock *BB, NonLocalDepInfo *Cache, unsigned NumSortedEntries,
937 BatchAAResults &BatchAA) {
938
939 bool isInvariantLoad = false;
940
941 if (LoadInst *LI = dyn_cast_or_null<LoadInst>(QueryInst))
942 isInvariantLoad = LI->getMetadata(LLVMContext::MD_invariant_load);
943
944 // Do a binary search to see if we already have an entry for this block in
945 // the cache set. If so, find it.
946 NonLocalDepInfo::iterator Entry = std::upper_bound(
947 Cache->begin(), Cache->begin() + NumSortedEntries, NonLocalDepEntry(BB));
948 if (Entry != Cache->begin() && (Entry - 1)->getBB() == BB)
949 --Entry;
950
951 NonLocalDepEntry *ExistingResult = nullptr;
952 if (Entry != Cache->begin() + NumSortedEntries && Entry->getBB() == BB)
953 ExistingResult = &*Entry;
954
955 // Use cached result for invariant load only if there is no dependency for non
956 // invariant load. In this case invariant load can not have any dependency as
957 // well.
958 if (ExistingResult && isInvariantLoad &&
959 !ExistingResult->getResult().isNonFuncLocal())
960 ExistingResult = nullptr;
961
962 // If we have a cached entry, and it is non-dirty, use it as the value for
963 // this dependency.
964 if (ExistingResult && !ExistingResult->getResult().isDirty()) {
965 ++NumCacheNonLocalPtr;
966 return ExistingResult->getResult();
967 }
968
969 // Otherwise, we have to scan for the value. If we have a dirty cache
970 // entry, start scanning from its position, otherwise we scan from the end
971 // of the block.
972 BasicBlock::iterator ScanPos = BB->end();
973 if (ExistingResult && ExistingResult->getResult().getInst()) {
974 assert(ExistingResult->getResult().getInst()->getParent() == BB &&
975 "Instruction invalidated?");
976 ++NumCacheDirtyNonLocalPtr;
977 ScanPos = ExistingResult->getResult().getInst()->getIterator();
978
979 // Eliminating the dirty entry from 'Cache', so update the reverse info.
980 ValueIsLoadPair CacheKey(Loc.Ptr, isLoad);
981 RemoveFromReverseMap(ReverseNonLocalPtrDeps, &*ScanPos, CacheKey);
982 } else {
983 ++NumUncacheNonLocalPtr;
984 }
985
986 // Scan the block for the dependency.
987 MemDepResult Dep = getPointerDependencyFrom(Loc, isLoad, ScanPos, BB,
988 QueryInst, nullptr, BatchAA);
989
990 // Don't cache results for invariant load.
991 if (isInvariantLoad)
992 return Dep;
993
994 // If we had a dirty entry for the block, update it. Otherwise, just add
995 // a new entry.
996 if (ExistingResult)
997 ExistingResult->setResult(Dep);
998 else
999 Cache->push_back(NonLocalDepEntry(BB, Dep));
1000
1001 // If the block has a dependency (i.e. it isn't completely transparent to
1002 // the value), remember the reverse association because we just added it
1003 // to Cache!
1004 if (!Dep.isLocal())
1005 return Dep;
1006
1007 // Keep the ReverseNonLocalPtrDeps map up to date so we can efficiently
1008 // update MemDep when we remove instructions.
1009 Instruction *Inst = Dep.getInst();
1010 assert(Inst && "Didn't depend on anything?");
1011 ValueIsLoadPair CacheKey(Loc.Ptr, isLoad);
1012 ReverseNonLocalPtrDeps[Inst].insert(CacheKey);
1013 return Dep;
1014}
1015
1016/// Sort the NonLocalDepInfo cache, given a certain number of elements in the
1017/// array that are already properly ordered.
1018///
1019/// This is optimized for the case when only a few entries are added.
1020static void
1022 unsigned NumSortedEntries) {
1023 switch (Cache.size() - NumSortedEntries) {
1024 case 0:
1025 // done, no new entries.
1026 break;
1027 case 2: {
1028 // Two new entries, insert the last one into place.
1029 NonLocalDepEntry Val = Cache.back();
1030 Cache.pop_back();
1031 MemoryDependenceResults::NonLocalDepInfo::iterator Entry =
1032 std::upper_bound(Cache.begin(), Cache.end() - 1, Val);
1033 Cache.insert(Entry, Val);
1034 [[fallthrough]];
1035 }
1036 case 1:
1037 // One new entry, Just insert the new value at the appropriate position.
1038 if (Cache.size() != 1) {
1039 NonLocalDepEntry Val = Cache.back();
1040 Cache.pop_back();
1041 MemoryDependenceResults::NonLocalDepInfo::iterator Entry =
1042 llvm::upper_bound(Cache, Val);
1043 Cache.insert(Entry, Val);
1044 }
1045 break;
1046 default:
1047 // Added many values, do a full scale sort.
1048 llvm::sort(Cache);
1049 break;
1050 }
1051}
1052
1053/// Perform a dependency query based on pointer/pointeesize starting at the end
1054/// of StartBB.
1055///
1056/// Add any clobber/def results to the results vector and keep track of which
1057/// blocks are visited in 'Visited'.
1058///
1059/// This has special behavior for the first block queries (when SkipFirstBlock
1060/// is true). In this special case, it ignores the contents of the specified
1061/// block and starts returning dependence info for its predecessors.
1062///
1063/// This function returns true on success, or false to indicate that it could
1064/// not compute dependence information for some reason. This should be treated
1065/// as a clobber dependence on the first instruction in the predecessor block.
1066bool MemoryDependenceResults::getNonLocalPointerDepFromBB(
1067 Instruction *QueryInst, const PHITransAddr &Pointer,
1068 const MemoryLocation &Loc, bool isLoad, BasicBlock *StartBB,
1070 DenseMap<BasicBlock *, Value *> &Visited, bool SkipFirstBlock,
1071 bool IsIncomplete) {
1072 // Look up the cached info for Pointer.
1073 ValueIsLoadPair CacheKey(Pointer.getAddr(), isLoad);
1074
1075 // Set up a temporary NLPI value. If the map doesn't yet have an entry for
1076 // CacheKey, this value will be inserted as the associated value. Otherwise,
1077 // it'll be ignored, and we'll have to check to see if the cached size and
1078 // aa tags are consistent with the current query.
1079 NonLocalPointerInfo InitialNLPI;
1080 InitialNLPI.Size = Loc.Size;
1081 InitialNLPI.AATags = Loc.AATags;
1082
1083 bool isInvariantLoad = false;
1084 if (LoadInst *LI = dyn_cast_or_null<LoadInst>(QueryInst))
1085 isInvariantLoad = LI->getMetadata(LLVMContext::MD_invariant_load);
1086
1087 // Get the NLPI for CacheKey, inserting one into the map if it doesn't
1088 // already have one.
1089 std::pair<CachedNonLocalPointerInfo::iterator, bool> Pair =
1090 NonLocalPointerDeps.insert(std::make_pair(CacheKey, InitialNLPI));
1091 NonLocalPointerInfo *CacheInfo = &Pair.first->second;
1092
1093 // If we already have a cache entry for this CacheKey, we may need to do some
1094 // work to reconcile the cache entry and the current query.
1095 // Invariant loads don't participate in caching. Thus no need to reconcile.
1096 if (!isInvariantLoad && !Pair.second) {
1097 if (CacheInfo->Size != Loc.Size) {
1098 bool ThrowOutEverything;
1099 if (CacheInfo->Size.hasValue() && Loc.Size.hasValue()) {
1100 // FIXME: We may be able to do better in the face of results with mixed
1101 // precision. We don't appear to get them in practice, though, so just
1102 // be conservative.
1103 ThrowOutEverything =
1104 CacheInfo->Size.isPrecise() != Loc.Size.isPrecise() ||
1105 !TypeSize::isKnownGE(CacheInfo->Size.getValue(),
1106 Loc.Size.getValue());
1107 } else {
1108 // For our purposes, unknown size > all others.
1109 ThrowOutEverything = !Loc.Size.hasValue();
1110 }
1111
1112 if (ThrowOutEverything) {
1113 // The query's Size is greater than the cached one. Throw out the
1114 // cached data and proceed with the query at the greater size.
1115 CacheInfo->Pair = BBSkipFirstBlockPair();
1116 CacheInfo->Size = Loc.Size;
1117 for (auto &Entry : CacheInfo->NonLocalDeps)
1118 if (Instruction *Inst = Entry.getResult().getInst())
1119 RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
1120 CacheInfo->NonLocalDeps.clear();
1121 // The cache is cleared (in the above line) so we will have lost
1122 // information about blocks we have already visited. We therefore must
1123 // assume that the cache information is incomplete.
1124 IsIncomplete = true;
1125 } else {
1126 // This query's Size is less than the cached one. Conservatively restart
1127 // the query using the greater size.
1128 return getNonLocalPointerDepFromBB(
1129 QueryInst, Pointer, Loc.getWithNewSize(CacheInfo->Size), isLoad,
1130 StartBB, Result, Visited, SkipFirstBlock, IsIncomplete);
1131 }
1132 }
1133
1134 // If the query's AATags are inconsistent with the cached one,
1135 // conservatively throw out the cached data and restart the query with
1136 // no tag if needed.
1137 if (CacheInfo->AATags != Loc.AATags) {
1138 if (CacheInfo->AATags) {
1139 CacheInfo->Pair = BBSkipFirstBlockPair();
1140 CacheInfo->AATags = AAMDNodes();
1141 for (auto &Entry : CacheInfo->NonLocalDeps)
1142 if (Instruction *Inst = Entry.getResult().getInst())
1143 RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
1144 CacheInfo->NonLocalDeps.clear();
1145 // The cache is cleared (in the above line) so we will have lost
1146 // information about blocks we have already visited. We therefore must
1147 // assume that the cache information is incomplete.
1148 IsIncomplete = true;
1149 }
1150 if (Loc.AATags)
1151 return getNonLocalPointerDepFromBB(
1152 QueryInst, Pointer, Loc.getWithoutAATags(), isLoad, StartBB, Result,
1153 Visited, SkipFirstBlock, IsIncomplete);
1154 }
1155 }
1156
1157 NonLocalDepInfo *Cache = &CacheInfo->NonLocalDeps;
1158
1159 // If we have valid cached information for exactly the block we are
1160 // investigating, just return it with no recomputation.
1161 // Don't use cached information for invariant loads since it is valid for
1162 // non-invariant loads only.
1163 if (!IsIncomplete && !isInvariantLoad &&
1164 CacheInfo->Pair == BBSkipFirstBlockPair(StartBB, SkipFirstBlock)) {
1165 // We have a fully cached result for this query then we can just return the
1166 // cached results and populate the visited set. However, we have to verify
1167 // that we don't already have conflicting results for these blocks. Check
1168 // to ensure that if a block in the results set is in the visited set that
1169 // it was for the same pointer query.
1170 if (!Visited.empty()) {
1171 for (auto &Entry : *Cache) {
1173 Visited.find(Entry.getBB());
1174 if (VI == Visited.end() || VI->second == Pointer.getAddr())
1175 continue;
1176
1177 // We have a pointer mismatch in a block. Just return false, saying
1178 // that something was clobbered in this result. We could also do a
1179 // non-fully cached query, but there is little point in doing this.
1180 return false;
1181 }
1182 }
1183
1184 Value *Addr = Pointer.getAddr();
1185 for (auto &Entry : *Cache) {
1186 Visited.insert(std::make_pair(Entry.getBB(), Addr));
1187 if (Entry.getResult().isNonLocal()) {
1188 continue;
1189 }
1190
1191 if (DT.isReachableFromEntry(Entry.getBB())) {
1192 Result.push_back(
1193 NonLocalDepResult(Entry.getBB(), Entry.getResult(), Addr));
1194 }
1195 }
1196 ++NumCacheCompleteNonLocalPtr;
1197 return true;
1198 }
1199
1200 // Otherwise, either this is a new block, a block with an invalid cache
1201 // pointer or one that we're about to invalidate by putting more info into
1202 // it than its valid cache info. If empty and not explicitly indicated as
1203 // incomplete, the result will be valid cache info, otherwise it isn't.
1204 //
1205 // Invariant loads don't affect cache in any way thus no need to update
1206 // CacheInfo as well.
1207 if (!isInvariantLoad) {
1208 if (!IsIncomplete && Cache->empty())
1209 CacheInfo->Pair = BBSkipFirstBlockPair(StartBB, SkipFirstBlock);
1210 else
1211 CacheInfo->Pair = BBSkipFirstBlockPair();
1212 }
1213
1215 Worklist.push_back(StartBB);
1216
1217 // PredList used inside loop.
1219
1220 // Keep track of the entries that we know are sorted. Previously cached
1221 // entries will all be sorted. The entries we add we only sort on demand (we
1222 // don't insert every element into its sorted position). We know that we
1223 // won't get any reuse from currently inserted values, because we don't
1224 // revisit blocks after we insert info for them.
1225 unsigned NumSortedEntries = Cache->size();
1226 unsigned WorklistEntries = BlockNumberLimit;
1227 bool GotWorklistLimit = false;
1228 LLVM_DEBUG(AssertSorted(*Cache));
1229
1230 BatchAAResults BatchAA(AA, &EII);
1231 while (!Worklist.empty()) {
1232 BasicBlock *BB = Worklist.pop_back_val();
1233
1234 // If we do process a large number of blocks it becomes very expensive and
1235 // likely it isn't worth worrying about
1236 if (Result.size() > NumResultsLimit) {
1237 // Sort it now (if needed) so that recursive invocations of
1238 // getNonLocalPointerDepFromBB and other routines that could reuse the
1239 // cache value will only see properly sorted cache arrays.
1240 if (Cache && NumSortedEntries != Cache->size()) {
1241 SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
1242 }
1243 // Since we bail out, the "Cache" set won't contain all of the
1244 // results for the query. This is ok (we can still use it to accelerate
1245 // specific block queries) but we can't do the fastpath "return all
1246 // results from the set". Clear out the indicator for this.
1247 CacheInfo->Pair = BBSkipFirstBlockPair();
1248 return false;
1249 }
1250
1251 // Skip the first block if we have it.
1252 if (!SkipFirstBlock) {
1253 // Analyze the dependency of *Pointer in FromBB. See if we already have
1254 // been here.
1255 assert(Visited.count(BB) && "Should check 'visited' before adding to WL");
1256
1257 // Get the dependency info for Pointer in BB. If we have cached
1258 // information, we will use it, otherwise we compute it.
1259 LLVM_DEBUG(AssertSorted(*Cache, NumSortedEntries));
1260 MemDepResult Dep = getNonLocalInfoForBlock(
1261 QueryInst, Loc, isLoad, BB, Cache, NumSortedEntries, BatchAA);
1262
1263 // If we got a Def or Clobber, add this to the list of results.
1264 if (!Dep.isNonLocal()) {
1265 if (DT.isReachableFromEntry(BB)) {
1266 Result.push_back(NonLocalDepResult(BB, Dep, Pointer.getAddr()));
1267 continue;
1268 }
1269 }
1270 }
1271
1272 // If 'Pointer' is an instruction defined in this block, then we need to do
1273 // phi translation to change it into a value live in the predecessor block.
1274 // If not, we just add the predecessors to the worklist and scan them with
1275 // the same Pointer.
1276 if (!Pointer.needsPHITranslationFromBlock(BB)) {
1277 SkipFirstBlock = false;
1279 for (BasicBlock *Pred : PredCache.get(BB)) {
1280 // Verify that we haven't looked at this block yet.
1281 std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> InsertRes =
1282 Visited.insert(std::make_pair(Pred, Pointer.getAddr()));
1283 if (InsertRes.second) {
1284 // First time we've looked at *PI.
1285 NewBlocks.push_back(Pred);
1286 continue;
1287 }
1288
1289 // If we have seen this block before, but it was with a different
1290 // pointer then we have a phi translation failure and we have to treat
1291 // this as a clobber.
1292 if (InsertRes.first->second != Pointer.getAddr()) {
1293 // Make sure to clean up the Visited map before continuing on to
1294 // PredTranslationFailure.
1295 for (auto *NewBlock : NewBlocks)
1296 Visited.erase(NewBlock);
1297 goto PredTranslationFailure;
1298 }
1299 }
1300 if (NewBlocks.size() > WorklistEntries) {
1301 // Make sure to clean up the Visited map before continuing on to
1302 // PredTranslationFailure.
1303 for (auto *NewBlock : NewBlocks)
1304 Visited.erase(NewBlock);
1305 GotWorklistLimit = true;
1306 goto PredTranslationFailure;
1307 }
1308 WorklistEntries -= NewBlocks.size();
1309 Worklist.append(NewBlocks.begin(), NewBlocks.end());
1310 continue;
1311 }
1312
1313 // We do need to do phi translation, if we know ahead of time we can't phi
1314 // translate this value, don't even try.
1315 if (!Pointer.isPotentiallyPHITranslatable())
1316 goto PredTranslationFailure;
1317
1318 // We may have added values to the cache list before this PHI translation.
1319 // If so, we haven't done anything to ensure that the cache remains sorted.
1320 // Sort it now (if needed) so that recursive invocations of
1321 // getNonLocalPointerDepFromBB and other routines that could reuse the cache
1322 // value will only see properly sorted cache arrays.
1323 if (Cache && NumSortedEntries != Cache->size()) {
1324 SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
1325 NumSortedEntries = Cache->size();
1326 }
1327 Cache = nullptr;
1328
1329 PredList.clear();
1330 for (BasicBlock *Pred : PredCache.get(BB)) {
1331 PredList.push_back(std::make_pair(Pred, Pointer));
1332
1333 // Get the PHI translated pointer in this predecessor. This can fail if
1334 // not translatable, in which case the getAddr() returns null.
1335 PHITransAddr &PredPointer = PredList.back().second;
1336 Value *PredPtrVal =
1337 PredPointer.translateValue(BB, Pred, &DT, /*MustDominate=*/false);
1338
1339 // Check to see if we have already visited this pred block with another
1340 // pointer. If so, we can't do this lookup. This failure can occur
1341 // with PHI translation when a critical edge exists and the PHI node in
1342 // the successor translates to a pointer value different than the
1343 // pointer the block was first analyzed with.
1344 std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> InsertRes =
1345 Visited.insert(std::make_pair(Pred, PredPtrVal));
1346
1347 if (!InsertRes.second) {
1348 // We found the pred; take it off the list of preds to visit.
1349 PredList.pop_back();
1350
1351 // If the predecessor was visited with PredPtr, then we already did
1352 // the analysis and can ignore it.
1353 if (InsertRes.first->second == PredPtrVal)
1354 continue;
1355
1356 // Otherwise, the block was previously analyzed with a different
1357 // pointer. We can't represent the result of this case, so we just
1358 // treat this as a phi translation failure.
1359
1360 // Make sure to clean up the Visited map before continuing on to
1361 // PredTranslationFailure.
1362 for (const auto &Pred : PredList)
1363 Visited.erase(Pred.first);
1364
1365 goto PredTranslationFailure;
1366 }
1367 }
1368
1369 // Actually process results here; this need to be a separate loop to avoid
1370 // calling getNonLocalPointerDepFromBB for blocks we don't want to return
1371 // any results for. (getNonLocalPointerDepFromBB will modify our
1372 // datastructures in ways the code after the PredTranslationFailure label
1373 // doesn't expect.)
1374 for (auto &I : PredList) {
1375 BasicBlock *Pred = I.first;
1376 PHITransAddr &PredPointer = I.second;
1377 Value *PredPtrVal = PredPointer.getAddr();
1378
1379 bool CanTranslate = true;
1380 // If PHI translation was unable to find an available pointer in this
1381 // predecessor, then we have to assume that the pointer is clobbered in
1382 // that predecessor. We can still do PRE of the load, which would insert
1383 // a computation of the pointer in this predecessor.
1384 if (!PredPtrVal)
1385 CanTranslate = false;
1386
1387 // FIXME: it is entirely possible that PHI translating will end up with
1388 // the same value. Consider PHI translating something like:
1389 // X = phi [x, bb1], [y, bb2]. PHI translating for bb1 doesn't *need*
1390 // to recurse here, pedantically speaking.
1391
1392 // If getNonLocalPointerDepFromBB fails here, that means the cached
1393 // result conflicted with the Visited list; we have to conservatively
1394 // assume it is unknown, but this also does not block PRE of the load.
1395 if (!CanTranslate ||
1396 !getNonLocalPointerDepFromBB(QueryInst, PredPointer,
1397 Loc.getWithNewPtr(PredPtrVal), isLoad,
1398 Pred, Result, Visited)) {
1399 // Add the entry to the Result list.
1401 Result.push_back(Entry);
1402
1403 // Since we had a phi translation failure, the cache for CacheKey won't
1404 // include all of the entries that we need to immediately satisfy future
1405 // queries. Mark this in NonLocalPointerDeps by setting the
1406 // BBSkipFirstBlockPair pointer to null. This requires reuse of the
1407 // cached value to do more work but not miss the phi trans failure.
1408 NonLocalPointerInfo &NLPI = NonLocalPointerDeps[CacheKey];
1409 NLPI.Pair = BBSkipFirstBlockPair();
1410 continue;
1411 }
1412 }
1413
1414 // Refresh the CacheInfo/Cache pointer so that it isn't invalidated.
1415 CacheInfo = &NonLocalPointerDeps[CacheKey];
1416 Cache = &CacheInfo->NonLocalDeps;
1417 NumSortedEntries = Cache->size();
1418
1419 // Since we did phi translation, the "Cache" set won't contain all of the
1420 // results for the query. This is ok (we can still use it to accelerate
1421 // specific block queries) but we can't do the fastpath "return all
1422 // results from the set" Clear out the indicator for this.
1423 CacheInfo->Pair = BBSkipFirstBlockPair();
1424 SkipFirstBlock = false;
1425 continue;
1426
1427 PredTranslationFailure:
1428 // The following code is "failure"; we can't produce a sane translation
1429 // for the given block. It assumes that we haven't modified any of
1430 // our datastructures while processing the current block.
1431
1432 if (!Cache) {
1433 // Refresh the CacheInfo/Cache pointer if it got invalidated.
1434 CacheInfo = &NonLocalPointerDeps[CacheKey];
1435 Cache = &CacheInfo->NonLocalDeps;
1436 NumSortedEntries = Cache->size();
1437 }
1438
1439 // Since we failed phi translation, the "Cache" set won't contain all of the
1440 // results for the query. This is ok (we can still use it to accelerate
1441 // specific block queries) but we can't do the fastpath "return all
1442 // results from the set". Clear out the indicator for this.
1443 CacheInfo->Pair = BBSkipFirstBlockPair();
1444
1445 // If *nothing* works, mark the pointer as unknown.
1446 //
1447 // If this is the magic first block, return this as a clobber of the whole
1448 // incoming value. Since we can't phi translate to one of the predecessors,
1449 // we have to bail out.
1450 if (SkipFirstBlock)
1451 return false;
1452
1453 // Results of invariant loads are not cached thus no need to update cached
1454 // information.
1455 if (!isInvariantLoad) {
1456 for (NonLocalDepEntry &I : llvm::reverse(*Cache)) {
1457 if (I.getBB() != BB)
1458 continue;
1459
1460 assert((GotWorklistLimit || I.getResult().isNonLocal() ||
1461 !DT.isReachableFromEntry(BB)) &&
1462 "Should only be here with transparent block");
1463
1464 I.setResult(MemDepResult::getUnknown());
1465
1466
1467 break;
1468 }
1469 }
1470 (void)GotWorklistLimit;
1471 // Go ahead and report unknown dependence.
1472 Result.push_back(
1474 }
1475
1476 // Okay, we're done now. If we added new values to the cache, re-sort it.
1477 SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
1478 LLVM_DEBUG(AssertSorted(*Cache));
1479 return true;
1480}
1481
1482/// If P exists in CachedNonLocalPointerInfo or NonLocalDefsCache, remove it.
1483void MemoryDependenceResults::removeCachedNonLocalPointerDependencies(
1484 ValueIsLoadPair P) {
1485
1486 // Most of the time this cache is empty.
1487 if (!NonLocalDefsCache.empty()) {
1488 auto it = NonLocalDefsCache.find(P.getPointer());
1489 if (it != NonLocalDefsCache.end()) {
1490 RemoveFromReverseMap(ReverseNonLocalDefsCache,
1491 it->second.getResult().getInst(), P.getPointer());
1492 NonLocalDefsCache.erase(it);
1493 }
1494
1495 if (auto *I = dyn_cast<Instruction>(P.getPointer())) {
1496 auto toRemoveIt = ReverseNonLocalDefsCache.find(I);
1497 if (toRemoveIt != ReverseNonLocalDefsCache.end()) {
1498 for (const auto *entry : toRemoveIt->second)
1499 NonLocalDefsCache.erase(entry);
1500 ReverseNonLocalDefsCache.erase(toRemoveIt);
1501 }
1502 }
1503 }
1504
1505 CachedNonLocalPointerInfo::iterator It = NonLocalPointerDeps.find(P);
1506 if (It == NonLocalPointerDeps.end())
1507 return;
1508
1509 // Remove all of the entries in the BB->val map. This involves removing
1510 // instructions from the reverse map.
1511 NonLocalDepInfo &PInfo = It->second.NonLocalDeps;
1512
1513 for (const NonLocalDepEntry &DE : PInfo) {
1514 Instruction *Target = DE.getResult().getInst();
1515 if (!Target)
1516 continue; // Ignore non-local dep results.
1517 assert(Target->getParent() == DE.getBB());
1518
1519 // Eliminating the dirty entry from 'Cache', so update the reverse info.
1520 RemoveFromReverseMap(ReverseNonLocalPtrDeps, Target, P);
1521 }
1522
1523 // Remove P from NonLocalPointerDeps (which deletes NonLocalDepInfo).
1524 NonLocalPointerDeps.erase(It);
1525}
1526
1528 // If Ptr isn't really a pointer, just ignore it.
1529 if (!Ptr->getType()->isPointerTy())
1530 return;
1531 // Flush store info for the pointer.
1532 removeCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, false));
1533 // Flush load info for the pointer.
1534 removeCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, true));
1535}
1536
1538 PredCache.clear();
1539}
1540
1542 EII.removeInstruction(RemInst);
1543
1544 // Walk through the Non-local dependencies, removing this one as the value
1545 // for any cached queries.
1546 NonLocalDepMapType::iterator NLDI = NonLocalDepsMap.find(RemInst);
1547 if (NLDI != NonLocalDepsMap.end()) {
1548 NonLocalDepInfo &BlockMap = NLDI->second.first;
1549 for (auto &Entry : BlockMap)
1550 if (Instruction *Inst = Entry.getResult().getInst())
1551 RemoveFromReverseMap(ReverseNonLocalDeps, Inst, RemInst);
1552 NonLocalDepsMap.erase(NLDI);
1553 }
1554
1555 // If we have a cached local dependence query for this instruction, remove it.
1556 LocalDepMapType::iterator LocalDepEntry = LocalDeps.find(RemInst);
1557 if (LocalDepEntry != LocalDeps.end()) {
1558 // Remove us from DepInst's reverse set now that the local dep info is gone.
1559 if (Instruction *Inst = LocalDepEntry->second.getInst())
1560 RemoveFromReverseMap(ReverseLocalDeps, Inst, RemInst);
1561
1562 // Remove this local dependency info.
1563 LocalDeps.erase(LocalDepEntry);
1564 }
1565
1566 // If we have any cached dependencies on this instruction, remove
1567 // them.
1568
1569 // If the instruction is a pointer, remove it from both the load info and the
1570 // store info.
1571 if (RemInst->getType()->isPointerTy()) {
1572 removeCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, false));
1573 removeCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, true));
1574 } else {
1575 // Otherwise, if the instructions is in the map directly, it must be a load.
1576 // Remove it.
1577 auto toRemoveIt = NonLocalDefsCache.find(RemInst);
1578 if (toRemoveIt != NonLocalDefsCache.end()) {
1579 assert(isa<LoadInst>(RemInst) &&
1580 "only load instructions should be added directly");
1581 const Instruction *DepV = toRemoveIt->second.getResult().getInst();
1582 ReverseNonLocalDefsCache.find(DepV)->second.erase(RemInst);
1583 NonLocalDefsCache.erase(toRemoveIt);
1584 }
1585 }
1586
1587 // Loop over all of the things that depend on the instruction we're removing.
1589
1590 // If we find RemInst as a clobber or Def in any of the maps for other values,
1591 // we need to replace its entry with a dirty version of the instruction after
1592 // it. If RemInst is a terminator, we use a null dirty value.
1593 //
1594 // Using a dirty version of the instruction after RemInst saves having to scan
1595 // the entire block to get to this point.
1596 MemDepResult NewDirtyVal;
1597 if (!RemInst->isTerminator())
1598 NewDirtyVal = MemDepResult::getDirty(&*++RemInst->getIterator());
1599
1600 ReverseDepMapType::iterator ReverseDepIt = ReverseLocalDeps.find(RemInst);
1601 if (ReverseDepIt != ReverseLocalDeps.end()) {
1602 // RemInst can't be the terminator if it has local stuff depending on it.
1603 assert(!ReverseDepIt->second.empty() && !RemInst->isTerminator() &&
1604 "Nothing can locally depend on a terminator");
1605
1606 for (Instruction *InstDependingOnRemInst : ReverseDepIt->second) {
1607 assert(InstDependingOnRemInst != RemInst &&
1608 "Already removed our local dep info");
1609
1610 LocalDeps[InstDependingOnRemInst] = NewDirtyVal;
1611
1612 // Make sure to remember that new things depend on NewDepInst.
1613 assert(NewDirtyVal.getInst() &&
1614 "There is no way something else can have "
1615 "a local dep on this if it is a terminator!");
1616 ReverseDepsToAdd.push_back(
1617 std::make_pair(NewDirtyVal.getInst(), InstDependingOnRemInst));
1618 }
1619
1620 ReverseLocalDeps.erase(ReverseDepIt);
1621
1622 // Add new reverse deps after scanning the set, to avoid invalidating the
1623 // 'ReverseDeps' reference.
1624 while (!ReverseDepsToAdd.empty()) {
1625 ReverseLocalDeps[ReverseDepsToAdd.back().first].insert(
1626 ReverseDepsToAdd.back().second);
1627 ReverseDepsToAdd.pop_back();
1628 }
1629 }
1630
1631 ReverseDepIt = ReverseNonLocalDeps.find(RemInst);
1632 if (ReverseDepIt != ReverseNonLocalDeps.end()) {
1633 for (Instruction *I : ReverseDepIt->second) {
1634 assert(I != RemInst && "Already removed NonLocalDep info for RemInst");
1635
1636 PerInstNLInfo &INLD = NonLocalDepsMap[I];
1637 // The information is now dirty!
1638 INLD.second = true;
1639
1640 for (auto &Entry : INLD.first) {
1641 if (Entry.getResult().getInst() != RemInst)
1642 continue;
1643
1644 // Convert to a dirty entry for the subsequent instruction.
1645 Entry.setResult(NewDirtyVal);
1646
1647 if (Instruction *NextI = NewDirtyVal.getInst())
1648 ReverseDepsToAdd.push_back(std::make_pair(NextI, I));
1649 }
1650 }
1651
1652 ReverseNonLocalDeps.erase(ReverseDepIt);
1653
1654 // Add new reverse deps after scanning the set, to avoid invalidating 'Set'
1655 while (!ReverseDepsToAdd.empty()) {
1656 ReverseNonLocalDeps[ReverseDepsToAdd.back().first].insert(
1657 ReverseDepsToAdd.back().second);
1658 ReverseDepsToAdd.pop_back();
1659 }
1660 }
1661
1662 // If the instruction is in ReverseNonLocalPtrDeps then it appears as a
1663 // value in the NonLocalPointerDeps info.
1664 ReverseNonLocalPtrDepTy::iterator ReversePtrDepIt =
1665 ReverseNonLocalPtrDeps.find(RemInst);
1666 if (ReversePtrDepIt != ReverseNonLocalPtrDeps.end()) {
1668 ReversePtrDepsToAdd;
1669
1670 for (ValueIsLoadPair P : ReversePtrDepIt->second) {
1671 assert(P.getPointer() != RemInst &&
1672 "Already removed NonLocalPointerDeps info for RemInst");
1673
1674 NonLocalDepInfo &NLPDI = NonLocalPointerDeps[P].NonLocalDeps;
1675
1676 // The cache is not valid for any specific block anymore.
1677 NonLocalPointerDeps[P].Pair = BBSkipFirstBlockPair();
1678
1679 // Update any entries for RemInst to use the instruction after it.
1680 for (auto &Entry : NLPDI) {
1681 if (Entry.getResult().getInst() != RemInst)
1682 continue;
1683
1684 // Convert to a dirty entry for the subsequent instruction.
1685 Entry.setResult(NewDirtyVal);
1686
1687 if (Instruction *NewDirtyInst = NewDirtyVal.getInst())
1688 ReversePtrDepsToAdd.push_back(std::make_pair(NewDirtyInst, P));
1689 }
1690
1691 // Re-sort the NonLocalDepInfo. Changing the dirty entry to its
1692 // subsequent value may invalidate the sortedness.
1693 llvm::sort(NLPDI);
1694 }
1695
1696 ReverseNonLocalPtrDeps.erase(ReversePtrDepIt);
1697
1698 while (!ReversePtrDepsToAdd.empty()) {
1699 ReverseNonLocalPtrDeps[ReversePtrDepsToAdd.back().first].insert(
1700 ReversePtrDepsToAdd.back().second);
1701 ReversePtrDepsToAdd.pop_back();
1702 }
1703 }
1704
1705 assert(!NonLocalDepsMap.count(RemInst) && "RemInst got reinserted?");
1706 LLVM_DEBUG(verifyRemoved(RemInst));
1707}
1708
1709/// Verify that the specified instruction does not occur in our internal data
1710/// structures.
1711///
1712/// This function verifies by asserting in debug builds.
1713void MemoryDependenceResults::verifyRemoved(Instruction *D) const {
1714#ifndef NDEBUG
1715 for (const auto &DepKV : LocalDeps) {
1716 assert(DepKV.first != D && "Inst occurs in data structures");
1717 assert(DepKV.second.getInst() != D && "Inst occurs in data structures");
1718 }
1719
1720 for (const auto &DepKV : NonLocalPointerDeps) {
1721 assert(DepKV.first.getPointer() != D && "Inst occurs in NLPD map key");
1722 for (const auto &Entry : DepKV.second.NonLocalDeps)
1723 assert(Entry.getResult().getInst() != D && "Inst occurs as NLPD value");
1724 }
1725
1726 for (const auto &DepKV : NonLocalDepsMap) {
1727 assert(DepKV.first != D && "Inst occurs in data structures");
1728 const PerInstNLInfo &INLD = DepKV.second;
1729 for (const auto &Entry : INLD.first)
1730 assert(Entry.getResult().getInst() != D &&
1731 "Inst occurs in data structures");
1732 }
1733
1734 for (const auto &DepKV : ReverseLocalDeps) {
1735 assert(DepKV.first != D && "Inst occurs in data structures");
1736 for (Instruction *Inst : DepKV.second)
1737 assert(Inst != D && "Inst occurs in data structures");
1738 }
1739
1740 for (const auto &DepKV : ReverseNonLocalDeps) {
1741 assert(DepKV.first != D && "Inst occurs in data structures");
1742 for (Instruction *Inst : DepKV.second)
1743 assert(Inst != D && "Inst occurs in data structures");
1744 }
1745
1746 for (const auto &DepKV : ReverseNonLocalPtrDeps) {
1747 assert(DepKV.first != D && "Inst occurs in rev NLPD map");
1748
1749 for (ValueIsLoadPair P : DepKV.second)
1750 assert(P != ValueIsLoadPair(D, false) && P != ValueIsLoadPair(D, true) &&
1751 "Inst occurs in ReverseNonLocalPtrDeps map");
1752 }
1753#endif
1754}
1755
1756AnalysisKey MemoryDependenceAnalysis::Key;
1757
1759 : DefaultBlockScanLimit(BlockScanLimit) {}
1760
1763 auto &AA = AM.getResult<AAManager>(F);
1764 auto &AC = AM.getResult<AssumptionAnalysis>(F);
1765 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
1766 auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1767 return MemoryDependenceResults(AA, AC, TLI, DT, DefaultBlockScanLimit);
1768}
1769
1771
1773 "Memory Dependence Analysis", false, true)
1780
1783}
1784
1786
1788 MemDep.reset();
1789}
1790
1792 AU.setPreservesAll();
1797}
1798
1801 // Check whether our analysis is preserved.
1802 auto PAC = PA.getChecker<MemoryDependenceAnalysis>();
1803 if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>())
1804 // If not, give up now.
1805 return true;
1806
1807 // Check whether the analyses we depend on became invalid for any reason.
1808 if (Inv.invalidate<AAManager>(F, PA) ||
1809 Inv.invalidate<AssumptionAnalysis>(F, PA) ||
1811 return true;
1812
1813 // Otherwise this analysis result remains valid.
1814 return false;
1815}
1816
1818 return DefaultBlockScanLimit;
1819}
1820
1822 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
1823 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1824 auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
1825 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1826 MemDep.emplace(AA, AC, TLI, DT, BlockScanLimit);
1827 return false;
1828}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static bool isLoad(int Opcode)
Atomic ordering constants.
basic Basic Alias true
block Block Frequency Analysis
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
uint64_t Addr
uint64_t Size
Hexagon Common GEP
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
static const unsigned int NumResultsLimit
static ModRefInfo GetLocation(const Instruction *Inst, MemoryLocation &Loc, const TargetLibraryInfo &TLI)
If the given instruction references a specific memory location, fill in Loc with the details,...
static cl::opt< unsigned > BlockNumberLimit("memdep-block-number-limit", cl::Hidden, cl::init(200), cl::desc("The number of blocks to scan during memory " "dependency analysis (default = 200)"))
static void RemoveFromReverseMap(DenseMap< Instruction *, SmallPtrSet< KeyTy, 4 > > &ReverseMap, Instruction *Inst, KeyTy Val)
This is a helper function that removes Val from 'Inst's set in ReverseMap.
static void SortNonLocalDepInfoCache(MemoryDependenceResults::NonLocalDepInfo &Cache, unsigned NumSortedEntries)
Sort the NonLocalDepInfo cache, given a certain number of elements in the array that are already prop...
static void AssertSorted(MemoryDependenceResults::NonLocalDepInfo &Cache, int Count=-1)
This method is used when -debug is specified to verify that cache arrays are properly kept sorted.
static bool canSkipClobberingStore(const StoreInst *SI, const MemoryLocation &MemLoc, Align MemLocAlign, BatchAAResults &BatchAA, unsigned ScanLimit)
static cl::opt< unsigned > BlockScanLimit("memdep-block-scan-limit", cl::Hidden, cl::init(100), cl::desc("The number of instructions to scan in a block in memory " "dependency analysis (default = 100)"))
This file provides utility analysis objects describing memory locations.
static bool isOrdered(const Instruction *I)
Definition: MemorySSA.cpp:1745
This file contains the declarations for metadata subclasses.
Module.h This file contains the declarations for the Module class.
uint64_t IntrinsicInst * II
#define P(N)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
This defines the Use class.
A manager for alias analyses.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Check whether or not an instruction may read or write the optionally specified memory location.
bool onlyReadsMemory(const CallBase *Call)
Checks if the specified call is known to only read from non-volatile memory (or not access memory at ...
The possible results of an alias query.
Definition: AliasAnalysis.h:81
@ NoAlias
The two locations do not alias at all.
Definition: AliasAnalysis.h:99
@ PartialAlias
The two locations alias, but only due to a partial overlap.
@ MustAlias
The two locations precisely alias each other.
This templated class represents "all analyses that operate over <a particular IR unit>" (e....
Definition: Analysis.h:47
API to communicate dependencies between analyses during invalidation.
Definition: PassManager.h:360
bool invalidate(IRUnitT &IR, const PreservedAnalyses &PA)
Trigger the invalidation of some other analysis pass if not already handled and return whether it was...
Definition: PassManager.h:378
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:321
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:473
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
AnalysisUsage & addRequiredTransitive()
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
iterator end()
Definition: BasicBlock.h:445
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:432
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:208
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:166
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:290
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
ModRefInfo getModRefInfoMask(const MemoryLocation &Loc, bool IgnoreLocals=false)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1494
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
bool erase(const KeyT &Val)
Definition: DenseMap.h:345
DenseMapIterator< KeyT, ValueT, KeyInfoT, BucketT > iterator
Definition: DenseMap.h:71
bool empty() const
Definition: DenseMap.h:98
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:151
iterator end()
Definition: DenseMap.h:84
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
Dependence - This class represents a dependence between two memory memory references in a function.
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:317
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:321
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
void removeInstruction(Instruction *I)
An instruction for ordering other memory operations.
Definition: Instructions.h:461
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
const BasicBlock & getEntryBlock() const
Definition: Function.h:790
bool mayWriteToMemory() const LLVM_READONLY
Return true if this instruction may modify memory.
bool hasMetadata() const
Return true if this instruction has any metadata attached to it.
Definition: Instruction.h:340
bool isTerminator() const
Definition: Instruction.h:254
bool mayReadFromMemory() const LLVM_READONLY
Return true if this instruction may read memory.
bool isVolatile() const LLVM_READONLY
Return true if this instruction has a volatile memory access.
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:48
An instruction for reading from memory.
Definition: Instructions.h:185
Value * getPointerOperand()
Definition: Instructions.h:281
bool hasValue() const
bool isScalable() const
TypeSize getValue() const
bool isPrecise() const
A memory dependence query can return one of three different answers.
bool isNonLocal() const
Tests if this MemDepResult represents a query that is transparent to the start of the block,...
static MemDepResult getNonLocal()
bool isNonFuncLocal() const
Tests if this MemDepResult represents a query that is transparent to the start of the function.
static MemDepResult getClobber(Instruction *Inst)
bool isDef() const
Tests if this MemDepResult represents a query that is an instruction definition dependency.
static MemDepResult getUnknown()
bool isLocal() const
Tests if this MemDepResult represents a valid local query (Clobber/Def).
bool isUnknown() const
Tests if this MemDepResult represents a query which cannot and/or will not be computed.
static MemDepResult getNonFuncLocal()
static MemDepResult getDef(Instruction *Inst)
get methods: These are static ctor methods for creating various MemDepResult kinds.
Instruction * getInst() const
If this is a normal dependency, returns the instruction that is depended on.
An analysis that produces MemoryDependenceResults for a function.
MemoryDependenceResults run(Function &F, FunctionAnalysisManager &AM)
Provides a lazy, caching interface for making common memory aliasing information queries,...
MemDepResult getSimplePointerDependencyFrom(const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt, BasicBlock *BB, Instruction *QueryInst, unsigned *Limit, BatchAAResults &BatchAA)
std::vector< NonLocalDepEntry > NonLocalDepInfo
void invalidateCachedPredecessors()
Clears the PredIteratorCache info.
void invalidateCachedPointerInfo(Value *Ptr)
Invalidates cached information about the specified pointer, because it may be too conservative in mem...
MemDepResult getPointerDependencyFrom(const MemoryLocation &Loc, bool isLoad, BasicBlock::iterator ScanIt, BasicBlock *BB, Instruction *QueryInst=nullptr, unsigned *Limit=nullptr)
Returns the instruction on which a memory location depends.
void removeInstruction(Instruction *InstToRemove)
Removes an instruction from the dependence analysis, updating the dependence of instructions that pre...
MemDepResult getInvariantGroupPointerDependency(LoadInst *LI, BasicBlock *BB)
This analysis looks for other loads and stores with invariant.group metadata and the same pointer ope...
unsigned getDefaultBlockScanLimit() const
Some methods limit the number of instructions they will examine.
MemDepResult getDependency(Instruction *QueryInst)
Returns the instruction on which a memory operation depends.
const NonLocalDepInfo & getNonLocalCallDependency(CallBase *QueryCall)
Perform a full dependency query for the specified call, returning the set of blocks that the value is...
void getNonLocalPointerDependency(Instruction *QueryInst, SmallVectorImpl< NonLocalDepResult > &Result)
Perform a full dependency query for an access to the QueryInst's specified memory location,...
bool invalidate(Function &F, const PreservedAnalyses &PA, FunctionAnalysisManager::Invalidator &Inv)
Handle invalidation in the new PM.
A wrapper analysis pass for the legacy pass manager that exposes a MemoryDepnedenceResults instance.
bool runOnFunction(Function &) override
Pass Implementation stuff. This doesn't do any analysis eagerly.
void getAnalysisUsage(AnalysisUsage &AU) const override
Does not modify anything. It uses Value Numbering and Alias Analysis.
void releaseMemory() override
Clean up memory in between runs.
Representation for a specific memory location.
MemoryLocation getWithNewSize(LocationSize NewSize) const
MemoryLocation getWithoutAATags() const
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
LocationSize Size
The maximum size of the location, in address-units, or UnknownSize if the size is not known.
static MemoryLocation getAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location after Ptr, while remaining within the underlying objec...
MemoryLocation getWithNewPtr(const Value *NewPtr) const
AAMDNodes AATags
The metadata nodes which describes the aliasing of the location (each member is null if that kind of ...
const Value * Ptr
The address of the start of the location.
static MemoryLocation getForArgument(const CallBase *Call, unsigned ArgIdx, const TargetLibraryInfo *TLI)
Return a location representing a particular argument of a call.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.h:293
This is an entry in the NonLocalDepInfo cache.
void setResult(const MemDepResult &R)
const MemDepResult & getResult() const
This is a result from a NonLocal dependence query.
PHITransAddr - An address value which tracks and handles phi translation.
Definition: PHITransAddr.h:35
Value * translateValue(BasicBlock *CurBB, BasicBlock *PredBB, const DominatorTree *DT, bool MustDominate)
translateValue - PHI translate the current address up the CFG from CurBB to Pred, updating our state ...
Value * getAddr() const
Definition: PHITransAddr.h:58
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
void clear()
clear - Remove all information.
ArrayRef< BasicBlock * > get(BasicBlock *BB)
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:109
PreservedAnalysisChecker getChecker() const
Build a checker for this PreservedAnalyses and the specified analysis type.
Definition: Analysis.h:264
size_type size() const
Definition: SmallPtrSet.h:94
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:342
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
An instruction for storing to memory.
Definition: Instructions.h:318
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
Target - Wrapper for Target specific information.
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:255
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
This class represents the va_arg llvm instruction, which returns an argument of the specified type gi...
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition: Value.cpp:926
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:693
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
static constexpr bool isKnownGE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition: TypeSize.h:236
const ParentPtrTy getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
This class provides various memory handling functions that manipulate MemoryBlock instances.
Definition: Memory.h:52
@ Entry
Definition: COFF.h:811
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void initializeMemoryDependenceWrapperPassPass(PassRegistry &)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2067
bool isStrongerThanUnordered(AtomicOrdering AO)
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
bool isNoAliasCall(const Value *V)
Return true if this pointer is returned by a noalias function.
auto upper_bound(R &&Range, T &&Value)
Provide wrappers to std::upper_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1967
const Value * getPointerOperand(const Value *V)
A helper function that returns the pointer operand of a load, store or GEP instruction.
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
bool isModSet(const ModRefInfo MRI)
Definition: ModRef.h:48
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1647
bool isModOrRefSet(const ModRefInfo MRI)
Definition: ModRef.h:42
AtomicOrdering
Atomic ordering for LLVM's memory model.
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
Definition: ModRef.h:27
@ Ref
The access may reference the value stored in memory.
@ Mod
The access may modify the value stored in memory.
@ NoModRef
The access neither references nor modifies the value stored in memory.
@ Other
Any other memory.
Value * getFreedOperand(const CallBase *CB, const TargetLibraryInfo *TLI)
If this if a call to a free function, return the freed operand.
bool isNoModRef(const ModRefInfo MRI)
Definition: ModRef.h:39
bool isStrongerThan(AtomicOrdering AO, AtomicOrdering Other)
Returns true if ao is stronger than other as defined by the AtomicOrdering lattice,...
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:760
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: Analysis.h:26