LLVM 17.0.0git
MemoryDependenceAnalysis.cpp
Go to the documentation of this file.
1//===- MemoryDependenceAnalysis.cpp - Mem Deps Implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements an analysis that determines, for a given memory
10// operation, what preceding memory operations it depends on. It builds on
11// alias analysis information, and tries to provide a lazy, caching interface to
12// a common kind of alias information query.
13//
14//===----------------------------------------------------------------------===//
15
17#include "llvm/ADT/DenseMap.h"
18#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/Statistic.h"
29#include "llvm/IR/BasicBlock.h"
30#include "llvm/IR/Dominators.h"
31#include "llvm/IR/Function.h"
32#include "llvm/IR/InstrTypes.h"
33#include "llvm/IR/Instruction.h"
36#include "llvm/IR/LLVMContext.h"
37#include "llvm/IR/Metadata.h"
38#include "llvm/IR/Module.h"
40#include "llvm/IR/Type.h"
41#include "llvm/IR/Use.h"
42#include "llvm/IR/Value.h"
44#include "llvm/Pass.h"
49#include "llvm/Support/Debug.h"
50#include <algorithm>
51#include <cassert>
52#include <iterator>
53#include <utility>
54
55using namespace llvm;
56
57#define DEBUG_TYPE "memdep"
58
59STATISTIC(NumCacheNonLocal, "Number of fully cached non-local responses");
60STATISTIC(NumCacheDirtyNonLocal, "Number of dirty cached non-local responses");
61STATISTIC(NumUncacheNonLocal, "Number of uncached non-local responses");
62
63STATISTIC(NumCacheNonLocalPtr,
64 "Number of fully cached non-local ptr responses");
65STATISTIC(NumCacheDirtyNonLocalPtr,
66 "Number of cached, but dirty, non-local ptr responses");
67STATISTIC(NumUncacheNonLocalPtr, "Number of uncached non-local ptr responses");
68STATISTIC(NumCacheCompleteNonLocalPtr,
69 "Number of block queries that were completely cached");
70
71// Limit for the number of instructions to scan in a block.
72
74 "memdep-block-scan-limit", cl::Hidden, cl::init(100),
75 cl::desc("The number of instructions to scan in a block in memory "
76 "dependency analysis (default = 100)"));
77
79 BlockNumberLimit("memdep-block-number-limit", cl::Hidden, cl::init(200),
80 cl::desc("The number of blocks to scan during memory "
81 "dependency analysis (default = 200)"));
82
83// Limit on the number of memdep results to process.
84static const unsigned int NumResultsLimit = 100;
85
86/// This is a helper function that removes Val from 'Inst's set in ReverseMap.
87///
88/// If the set becomes empty, remove Inst's entry.
89template <typename KeyTy>
90static void
92 Instruction *Inst, KeyTy Val) {
93 typename DenseMap<Instruction *, SmallPtrSet<KeyTy, 4>>::iterator InstIt =
94 ReverseMap.find(Inst);
95 assert(InstIt != ReverseMap.end() && "Reverse map out of sync?");
96 bool Found = InstIt->second.erase(Val);
97 assert(Found && "Invalid reverse map!");
98 (void)Found;
99 if (InstIt->second.empty())
100 ReverseMap.erase(InstIt);
101}
102
103/// If the given instruction references a specific memory location, fill in Loc
104/// with the details, otherwise set Loc.Ptr to null.
105///
106/// Returns a ModRefInfo value describing the general behavior of the
107/// instruction.
109 const TargetLibraryInfo &TLI) {
110 if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
111 if (LI->isUnordered()) {
112 Loc = MemoryLocation::get(LI);
113 return ModRefInfo::Ref;
114 }
115 if (LI->getOrdering() == AtomicOrdering::Monotonic) {
116 Loc = MemoryLocation::get(LI);
117 return ModRefInfo::ModRef;
118 }
119 Loc = MemoryLocation();
120 return ModRefInfo::ModRef;
121 }
122
123 if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
124 if (SI->isUnordered()) {
125 Loc = MemoryLocation::get(SI);
126 return ModRefInfo::Mod;
127 }
128 if (SI->getOrdering() == AtomicOrdering::Monotonic) {
129 Loc = MemoryLocation::get(SI);
130 return ModRefInfo::ModRef;
131 }
132 Loc = MemoryLocation();
133 return ModRefInfo::ModRef;
134 }
135
136 if (const VAArgInst *V = dyn_cast<VAArgInst>(Inst)) {
137 Loc = MemoryLocation::get(V);
138 return ModRefInfo::ModRef;
139 }
140
141 if (const CallBase *CB = dyn_cast<CallBase>(Inst)) {
142 if (Value *FreedOp = getFreedOperand(CB, &TLI)) {
143 // calls to free() deallocate the entire structure
144 Loc = MemoryLocation::getAfter(FreedOp);
145 return ModRefInfo::Mod;
146 }
147 }
148
149 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
150 switch (II->getIntrinsicID()) {
151 case Intrinsic::lifetime_start:
152 case Intrinsic::lifetime_end:
153 case Intrinsic::invariant_start:
154 Loc = MemoryLocation::getForArgument(II, 1, TLI);
155 // These intrinsics don't really modify the memory, but returning Mod
156 // will allow them to be handled conservatively.
157 return ModRefInfo::Mod;
158 case Intrinsic::invariant_end:
159 Loc = MemoryLocation::getForArgument(II, 2, TLI);
160 // These intrinsics don't really modify the memory, but returning Mod
161 // will allow them to be handled conservatively.
162 return ModRefInfo::Mod;
163 case Intrinsic::masked_load:
164 Loc = MemoryLocation::getForArgument(II, 0, TLI);
165 return ModRefInfo::Ref;
166 case Intrinsic::masked_store:
167 Loc = MemoryLocation::getForArgument(II, 1, TLI);
168 return ModRefInfo::Mod;
169 default:
170 break;
171 }
172 }
173
174 // Otherwise, just do the coarse-grained thing that always works.
175 if (Inst->mayWriteToMemory())
176 return ModRefInfo::ModRef;
177 if (Inst->mayReadFromMemory())
178 return ModRefInfo::Ref;
179 return ModRefInfo::NoModRef;
180}
181
182/// Private helper for finding the local dependencies of a call site.
183MemDepResult MemoryDependenceResults::getCallDependencyFrom(
184 CallBase *Call, bool isReadOnlyCall, BasicBlock::iterator ScanIt,
185 BasicBlock *BB) {
186 unsigned Limit = getDefaultBlockScanLimit();
187
188 // Walk backwards through the block, looking for dependencies.
189 while (ScanIt != BB->begin()) {
190 Instruction *Inst = &*--ScanIt;
191 // Debug intrinsics don't cause dependences and should not affect Limit
192 if (isa<DbgInfoIntrinsic>(Inst))
193 continue;
194
195 // Limit the amount of scanning we do so we don't end up with quadratic
196 // running time on extreme testcases.
197 --Limit;
198 if (!Limit)
200
201 // If this inst is a memory op, get the pointer it accessed
202 MemoryLocation Loc;
203 ModRefInfo MR = GetLocation(Inst, Loc, TLI);
204 if (Loc.Ptr) {
205 // A simple instruction.
206 if (isModOrRefSet(AA.getModRefInfo(Call, Loc)))
207 return MemDepResult::getClobber(Inst);
208 continue;
209 }
210
211 if (auto *CallB = dyn_cast<CallBase>(Inst)) {
212 // If these two calls do not interfere, look past it.
213 if (isNoModRef(AA.getModRefInfo(Call, CallB))) {
214 // If the two calls are the same, return Inst as a Def, so that
215 // Call can be found redundant and eliminated.
216 if (isReadOnlyCall && !isModSet(MR) &&
217 Call->isIdenticalToWhenDefined(CallB))
218 return MemDepResult::getDef(Inst);
219
220 // Otherwise if the two calls don't interact (e.g. CallB is readnone)
221 // keep scanning.
222 continue;
223 } else
224 return MemDepResult::getClobber(Inst);
225 }
226
227 // If we could not obtain a pointer for the instruction and the instruction
228 // touches memory then assume that this is a dependency.
229 if (isModOrRefSet(MR))
230 return MemDepResult::getClobber(Inst);
231 }
232
233 // No dependence found. If this is the entry block of the function, it is
234 // unknown, otherwise it is non-local.
235 if (BB != &BB->getParent()->getEntryBlock())
238}
239
241 const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
242 BasicBlock *BB, Instruction *QueryInst, unsigned *Limit,
243 BatchAAResults &BatchAA) {
244 MemDepResult InvariantGroupDependency = MemDepResult::getUnknown();
245 if (QueryInst != nullptr) {
246 if (auto *LI = dyn_cast<LoadInst>(QueryInst)) {
247 InvariantGroupDependency = getInvariantGroupPointerDependency(LI, BB);
248
249 if (InvariantGroupDependency.isDef())
250 return InvariantGroupDependency;
251 }
252 }
254 MemLoc, isLoad, ScanIt, BB, QueryInst, Limit, BatchAA);
255 if (SimpleDep.isDef())
256 return SimpleDep;
257 // Non-local invariant group dependency indicates there is non local Def
258 // (it only returns nonLocal if it finds nonLocal def), which is better than
259 // local clobber and everything else.
260 if (InvariantGroupDependency.isNonLocal())
261 return InvariantGroupDependency;
262
263 assert(InvariantGroupDependency.isUnknown() &&
264 "InvariantGroupDependency should be only unknown at this point");
265 return SimpleDep;
266}
267
269 const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
270 BasicBlock *BB, Instruction *QueryInst, unsigned *Limit) {
271 BatchAAResults BatchAA(AA);
272 return getPointerDependencyFrom(MemLoc, isLoad, ScanIt, BB, QueryInst, Limit,
273 BatchAA);
274}
275
278 BasicBlock *BB) {
279
280 if (!LI->hasMetadata(LLVMContext::MD_invariant_group))
282
283 // Take the ptr operand after all casts and geps 0. This way we can search
284 // cast graph down only.
285 Value *LoadOperand = LI->getPointerOperand()->stripPointerCasts();
286
287 // It's is not safe to walk the use list of global value, because function
288 // passes aren't allowed to look outside their functions.
289 // FIXME: this could be fixed by filtering instructions from outside
290 // of current function.
291 if (isa<GlobalValue>(LoadOperand))
293
294 // Queue to process all pointers that are equivalent to load operand.
295 SmallVector<const Value *, 8> LoadOperandsQueue;
296 LoadOperandsQueue.push_back(LoadOperand);
297
298 Instruction *ClosestDependency = nullptr;
299 // Order of instructions in uses list is unpredictible. In order to always
300 // get the same result, we will look for the closest dominance.
301 auto GetClosestDependency = [this](Instruction *Best, Instruction *Other) {
302 assert(Other && "Must call it with not null instruction");
303 if (Best == nullptr || DT.dominates(Best, Other))
304 return Other;
305 return Best;
306 };
307
308 // FIXME: This loop is O(N^2) because dominates can be O(n) and in worst case
309 // we will see all the instructions. This should be fixed in MSSA.
310 while (!LoadOperandsQueue.empty()) {
311 const Value *Ptr = LoadOperandsQueue.pop_back_val();
312 assert(Ptr && !isa<GlobalValue>(Ptr) &&
313 "Null or GlobalValue should not be inserted");
314
315 for (const Use &Us : Ptr->uses()) {
316 auto *U = dyn_cast<Instruction>(Us.getUser());
317 if (!U || U == LI || !DT.dominates(U, LI))
318 continue;
319
320 // Bitcast or gep with zeros are using Ptr. Add to queue to check it's
321 // users. U = bitcast Ptr
322 if (isa<BitCastInst>(U)) {
323 LoadOperandsQueue.push_back(U);
324 continue;
325 }
326 // Gep with zeros is equivalent to bitcast.
327 // FIXME: we are not sure if some bitcast should be canonicalized to gep 0
328 // or gep 0 to bitcast because of SROA, so there are 2 forms. When
329 // typeless pointers will be ready then both cases will be gone
330 // (and this BFS also won't be needed).
331 if (auto *GEP = dyn_cast<GetElementPtrInst>(U))
332 if (GEP->hasAllZeroIndices()) {
333 LoadOperandsQueue.push_back(U);
334 continue;
335 }
336
337 // If we hit load/store with the same invariant.group metadata (and the
338 // same pointer operand) we can assume that value pointed by pointer
339 // operand didn't change.
340 if ((isa<LoadInst>(U) ||
341 (isa<StoreInst>(U) &&
342 cast<StoreInst>(U)->getPointerOperand() == Ptr)) &&
343 U->hasMetadata(LLVMContext::MD_invariant_group))
344 ClosestDependency = GetClosestDependency(ClosestDependency, U);
345 }
346 }
347
348 if (!ClosestDependency)
350 if (ClosestDependency->getParent() == BB)
351 return MemDepResult::getDef(ClosestDependency);
352 // Def(U) can't be returned here because it is non-local. If local
353 // dependency won't be found then return nonLocal counting that the
354 // user will call getNonLocalPointerDependency, which will return cached
355 // result.
356 NonLocalDefsCache.try_emplace(
357 LI, NonLocalDepResult(ClosestDependency->getParent(),
358 MemDepResult::getDef(ClosestDependency), nullptr));
359 ReverseNonLocalDefsCache[ClosestDependency].insert(LI);
361}
362
364 const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
365 BasicBlock *BB, Instruction *QueryInst, unsigned *Limit,
366 BatchAAResults &BatchAA) {
367 bool isInvariantLoad = false;
368
369 unsigned DefaultLimit = getDefaultBlockScanLimit();
370 if (!Limit)
371 Limit = &DefaultLimit;
372
373 // We must be careful with atomic accesses, as they may allow another thread
374 // to touch this location, clobbering it. We are conservative: if the
375 // QueryInst is not a simple (non-atomic) memory access, we automatically
376 // return getClobber.
377 // If it is simple, we know based on the results of
378 // "Compiler testing via a theory of sound optimisations in the C11/C++11
379 // memory model" in PLDI 2013, that a non-atomic location can only be
380 // clobbered between a pair of a release and an acquire action, with no
381 // access to the location in between.
382 // Here is an example for giving the general intuition behind this rule.
383 // In the following code:
384 // store x 0;
385 // release action; [1]
386 // acquire action; [4]
387 // %val = load x;
388 // It is unsafe to replace %val by 0 because another thread may be running:
389 // acquire action; [2]
390 // store x 42;
391 // release action; [3]
392 // with synchronization from 1 to 2 and from 3 to 4, resulting in %val
393 // being 42. A key property of this program however is that if either
394 // 1 or 4 were missing, there would be a race between the store of 42
395 // either the store of 0 or the load (making the whole program racy).
396 // The paper mentioned above shows that the same property is respected
397 // by every program that can detect any optimization of that kind: either
398 // it is racy (undefined) or there is a release followed by an acquire
399 // between the pair of accesses under consideration.
400
401 // If the load is invariant, we "know" that it doesn't alias *any* write. We
402 // do want to respect mustalias results since defs are useful for value
403 // forwarding, but any mayalias write can be assumed to be noalias.
404 // Arguably, this logic should be pushed inside AliasAnalysis itself.
405 if (isLoad && QueryInst) {
406 LoadInst *LI = dyn_cast<LoadInst>(QueryInst);
407 if (LI && LI->hasMetadata(LLVMContext::MD_invariant_load))
408 isInvariantLoad = true;
409 }
410
411 // True for volatile instruction.
412 // For Load/Store return true if atomic ordering is stronger than AO,
413 // for other instruction just true if it can read or write to memory.
414 auto isComplexForReordering = [](Instruction * I, AtomicOrdering AO)->bool {
415 if (I->isVolatile())
416 return true;
417 if (auto *LI = dyn_cast<LoadInst>(I))
418 return isStrongerThan(LI->getOrdering(), AO);
419 if (auto *SI = dyn_cast<StoreInst>(I))
420 return isStrongerThan(SI->getOrdering(), AO);
421 return I->mayReadOrWriteMemory();
422 };
423
424 // Walk backwards through the basic block, looking for dependencies.
425 while (ScanIt != BB->begin()) {
426 Instruction *Inst = &*--ScanIt;
427
428 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
429 // Debug intrinsics don't (and can't) cause dependencies.
430 if (isa<DbgInfoIntrinsic>(II))
431 continue;
432
433 // Limit the amount of scanning we do so we don't end up with quadratic
434 // running time on extreme testcases.
435 --*Limit;
436 if (!*Limit)
438
439 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
440 // If we reach a lifetime begin or end marker, then the query ends here
441 // because the value is undefined.
442 Intrinsic::ID ID = II->getIntrinsicID();
443 switch (ID) {
444 case Intrinsic::lifetime_start: {
445 // FIXME: This only considers queries directly on the invariant-tagged
446 // pointer, not on query pointers that are indexed off of them. It'd
447 // be nice to handle that at some point (the right approach is to use
448 // GetPointerBaseWithConstantOffset).
449 MemoryLocation ArgLoc = MemoryLocation::getAfter(II->getArgOperand(1));
450 if (BatchAA.isMustAlias(ArgLoc, MemLoc))
451 return MemDepResult::getDef(II);
452 continue;
453 }
454 case Intrinsic::masked_load:
455 case Intrinsic::masked_store: {
456 MemoryLocation Loc;
457 /*ModRefInfo MR =*/ GetLocation(II, Loc, TLI);
458 AliasResult R = BatchAA.alias(Loc, MemLoc);
459 if (R == AliasResult::NoAlias)
460 continue;
461 if (R == AliasResult::MustAlias)
462 return MemDepResult::getDef(II);
463 if (ID == Intrinsic::masked_load)
464 continue;
465 return MemDepResult::getClobber(II);
466 }
467 }
468 }
469
470 // Values depend on loads if the pointers are must aliased. This means
471 // that a load depends on another must aliased load from the same value.
472 // One exception is atomic loads: a value can depend on an atomic load that
473 // it does not alias with when this atomic load indicates that another
474 // thread may be accessing the location.
475 if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
476 // While volatile access cannot be eliminated, they do not have to clobber
477 // non-aliasing locations, as normal accesses, for example, can be safely
478 // reordered with volatile accesses.
479 if (LI->isVolatile()) {
480 if (!QueryInst)
481 // Original QueryInst *may* be volatile
482 return MemDepResult::getClobber(LI);
483 if (QueryInst->isVolatile())
484 // Ordering required if QueryInst is itself volatile
485 return MemDepResult::getClobber(LI);
486 // Otherwise, volatile doesn't imply any special ordering
487 }
488
489 // Atomic loads have complications involved.
490 // A Monotonic (or higher) load is OK if the query inst is itself not
491 // atomic.
492 // FIXME: This is overly conservative.
493 if (LI->isAtomic() && isStrongerThanUnordered(LI->getOrdering())) {
494 if (!QueryInst ||
495 isComplexForReordering(QueryInst, AtomicOrdering::NotAtomic))
496 return MemDepResult::getClobber(LI);
497 if (LI->getOrdering() != AtomicOrdering::Monotonic)
498 return MemDepResult::getClobber(LI);
499 }
500
502
503 // If we found a pointer, check if it could be the same as our pointer.
504 AliasResult R = BatchAA.alias(LoadLoc, MemLoc);
505
506 if (R == AliasResult::NoAlias)
507 continue;
508
509 if (isLoad) {
510 // Must aliased loads are defs of each other.
511 if (R == AliasResult::MustAlias)
512 return MemDepResult::getDef(Inst);
513
514 // If we have a partial alias, then return this as a clobber for the
515 // client to handle.
516 if (R == AliasResult::PartialAlias && R.hasOffset()) {
517 ClobberOffsets[LI] = R.getOffset();
518 return MemDepResult::getClobber(Inst);
519 }
520
521 // Random may-alias loads don't depend on each other without a
522 // dependence.
523 continue;
524 }
525
526 // Stores don't alias loads from read-only memory.
527 if (!isModSet(BatchAA.getModRefInfoMask(LoadLoc)))
528 continue;
529
530 // Stores depend on may/must aliased loads.
531 return MemDepResult::getDef(Inst);
532 }
533
534 if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
535 // Atomic stores have complications involved.
536 // A Monotonic store is OK if the query inst is itself not atomic.
537 // FIXME: This is overly conservative.
538 if (!SI->isUnordered() && SI->isAtomic()) {
539 if (!QueryInst ||
540 isComplexForReordering(QueryInst, AtomicOrdering::Unordered))
542 // Ok, if we are here the guard above guarantee us that
543 // QueryInst is a non-atomic or unordered load/store.
544 // SI is atomic with monotonic or release semantic (seq_cst for store
545 // is actually a release semantic plus total order over other seq_cst
546 // instructions, as soon as QueryInst is not seq_cst we can consider it
547 // as simple release semantic).
548 // Monotonic and Release semantic allows re-ordering before store
549 // so we are safe to go further and check the aliasing. It will prohibit
550 // re-ordering in case locations are may or must alias.
551 }
552
553 // While volatile access cannot be eliminated, they do not have to clobber
554 // non-aliasing locations, as normal accesses can for example be reordered
555 // with volatile accesses.
556 if (SI->isVolatile())
557 if (!QueryInst || QueryInst->isVolatile())
559
560 // If alias analysis can tell that this store is guaranteed to not modify
561 // the query pointer, ignore it. Use getModRefInfo to handle cases where
562 // the query pointer points to constant memory etc.
563 if (!isModOrRefSet(BatchAA.getModRefInfo(SI, MemLoc)))
564 continue;
565
566 // Ok, this store might clobber the query pointer. Check to see if it is
567 // a must alias: in this case, we want to return this as a def.
568 // FIXME: Use ModRefInfo::Must bit from getModRefInfo call above.
570
571 // If we found a pointer, check if it could be the same as our pointer.
572 AliasResult R = BatchAA.alias(StoreLoc, MemLoc);
573
574 if (R == AliasResult::NoAlias)
575 continue;
576 if (R == AliasResult::MustAlias)
577 return MemDepResult::getDef(Inst);
578 if (isInvariantLoad)
579 continue;
580 return MemDepResult::getClobber(Inst);
581 }
582
583 // If this is an allocation, and if we know that the accessed pointer is to
584 // the allocation, return Def. This means that there is no dependence and
585 // the access can be optimized based on that. For example, a load could
586 // turn into undef. Note that we can bypass the allocation itself when
587 // looking for a clobber in many cases; that's an alias property and is
588 // handled by BasicAA.
589 if (isa<AllocaInst>(Inst) || isNoAliasCall(Inst)) {
590 const Value *AccessPtr = getUnderlyingObject(MemLoc.Ptr);
591 if (AccessPtr == Inst || BatchAA.isMustAlias(Inst, AccessPtr))
592 return MemDepResult::getDef(Inst);
593 }
594
595 // If we found a select instruction for MemLoc pointer, return it as Def
596 // dependency.
597 if (isa<SelectInst>(Inst) && MemLoc.Ptr == Inst)
598 return MemDepResult::getDef(Inst);
599
600 if (isInvariantLoad)
601 continue;
602
603 // A release fence requires that all stores complete before it, but does
604 // not prevent the reordering of following loads or stores 'before' the
605 // fence. As a result, we look past it when finding a dependency for
606 // loads. DSE uses this to find preceding stores to delete and thus we
607 // can't bypass the fence if the query instruction is a store.
608 if (FenceInst *FI = dyn_cast<FenceInst>(Inst))
609 if (isLoad && FI->getOrdering() == AtomicOrdering::Release)
610 continue;
611
612 // See if this instruction (e.g. a call or vaarg) mod/ref's the pointer.
613 ModRefInfo MR = BatchAA.getModRefInfo(Inst, MemLoc);
614 // If necessary, perform additional analysis.
615 if (isModAndRefSet(MR))
616 MR = BatchAA.callCapturesBefore(Inst, MemLoc, &DT);
617 switch (MR) {
619 // If the call has no effect on the queried pointer, just ignore it.
620 continue;
621 case ModRefInfo::Mod:
622 return MemDepResult::getClobber(Inst);
623 case ModRefInfo::Ref:
624 // If the call is known to never store to the pointer, and if this is a
625 // load query, we can safely ignore it (scan past it).
626 if (isLoad)
627 continue;
628 [[fallthrough]];
629 default:
630 // Otherwise, there is a potential dependence. Return a clobber.
631 return MemDepResult::getClobber(Inst);
632 }
633 }
634
635 // No dependence found. If this is the entry block of the function, it is
636 // unknown, otherwise it is non-local.
637 if (BB != &BB->getParent()->getEntryBlock())
640}
641
643 ClobberOffsets.clear();
644 Instruction *ScanPos = QueryInst;
645
646 // Check for a cached result
647 MemDepResult &LocalCache = LocalDeps[QueryInst];
648
649 // If the cached entry is non-dirty, just return it. Note that this depends
650 // on MemDepResult's default constructing to 'dirty'.
651 if (!LocalCache.isDirty())
652 return LocalCache;
653
654 // Otherwise, if we have a dirty entry, we know we can start the scan at that
655 // instruction, which may save us some work.
656 if (Instruction *Inst = LocalCache.getInst()) {
657 ScanPos = Inst;
658
659 RemoveFromReverseMap(ReverseLocalDeps, Inst, QueryInst);
660 }
661
662 BasicBlock *QueryParent = QueryInst->getParent();
663
664 // Do the scan.
665 if (BasicBlock::iterator(QueryInst) == QueryParent->begin()) {
666 // No dependence found. If this is the entry block of the function, it is
667 // unknown, otherwise it is non-local.
668 if (QueryParent != &QueryParent->getParent()->getEntryBlock())
669 LocalCache = MemDepResult::getNonLocal();
670 else
671 LocalCache = MemDepResult::getNonFuncLocal();
672 } else {
673 MemoryLocation MemLoc;
674 ModRefInfo MR = GetLocation(QueryInst, MemLoc, TLI);
675 if (MemLoc.Ptr) {
676 // If we can do a pointer scan, make it happen.
677 bool isLoad = !isModSet(MR);
678 if (auto *II = dyn_cast<IntrinsicInst>(QueryInst))
679 isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_start;
680
681 LocalCache =
682 getPointerDependencyFrom(MemLoc, isLoad, ScanPos->getIterator(),
683 QueryParent, QueryInst, nullptr);
684 } else if (auto *QueryCall = dyn_cast<CallBase>(QueryInst)) {
685 bool isReadOnly = AA.onlyReadsMemory(QueryCall);
686 LocalCache = getCallDependencyFrom(QueryCall, isReadOnly,
687 ScanPos->getIterator(), QueryParent);
688 } else
689 // Non-memory instruction.
690 LocalCache = MemDepResult::getUnknown();
691 }
692
693 // Remember the result!
694 if (Instruction *I = LocalCache.getInst())
695 ReverseLocalDeps[I].insert(QueryInst);
696
697 return LocalCache;
698}
699
700#ifndef NDEBUG
701/// This method is used when -debug is specified to verify that cache arrays
702/// are properly kept sorted.
704 int Count = -1) {
705 if (Count == -1)
706 Count = Cache.size();
707 assert(std::is_sorted(Cache.begin(), Cache.begin() + Count) &&
708 "Cache isn't sorted!");
709}
710#endif
711
714 assert(getDependency(QueryCall).isNonLocal() &&
715 "getNonLocalCallDependency should only be used on calls with "
716 "non-local deps!");
717 PerInstNLInfo &CacheP = NonLocalDepsMap[QueryCall];
718 NonLocalDepInfo &Cache = CacheP.first;
719
720 // This is the set of blocks that need to be recomputed. In the cached case,
721 // this can happen due to instructions being deleted etc. In the uncached
722 // case, this starts out as the set of predecessors we care about.
724
725 if (!Cache.empty()) {
726 // Okay, we have a cache entry. If we know it is not dirty, just return it
727 // with no computation.
728 if (!CacheP.second) {
729 ++NumCacheNonLocal;
730 return Cache;
731 }
732
733 // If we already have a partially computed set of results, scan them to
734 // determine what is dirty, seeding our initial DirtyBlocks worklist.
735 for (auto &Entry : Cache)
736 if (Entry.getResult().isDirty())
737 DirtyBlocks.push_back(Entry.getBB());
738
739 // Sort the cache so that we can do fast binary search lookups below.
740 llvm::sort(Cache);
741
742 ++NumCacheDirtyNonLocal;
743 } else {
744 // Seed DirtyBlocks with each of the preds of QueryInst's block.
745 BasicBlock *QueryBB = QueryCall->getParent();
746 append_range(DirtyBlocks, PredCache.get(QueryBB));
747 ++NumUncacheNonLocal;
748 }
749
750 // isReadonlyCall - If this is a read-only call, we can be more aggressive.
751 bool isReadonlyCall = AA.onlyReadsMemory(QueryCall);
752
754
755 unsigned NumSortedEntries = Cache.size();
756 LLVM_DEBUG(AssertSorted(Cache));
757
758 // Iterate while we still have blocks to update.
759 while (!DirtyBlocks.empty()) {
760 BasicBlock *DirtyBB = DirtyBlocks.pop_back_val();
761
762 // Already processed this block?
763 if (!Visited.insert(DirtyBB).second)
764 continue;
765
766 // Do a binary search to see if we already have an entry for this block in
767 // the cache set. If so, find it.
768 LLVM_DEBUG(AssertSorted(Cache, NumSortedEntries));
769 NonLocalDepInfo::iterator Entry =
770 std::upper_bound(Cache.begin(), Cache.begin() + NumSortedEntries,
771 NonLocalDepEntry(DirtyBB));
772 if (Entry != Cache.begin() && std::prev(Entry)->getBB() == DirtyBB)
773 --Entry;
774
775 NonLocalDepEntry *ExistingResult = nullptr;
776 if (Entry != Cache.begin() + NumSortedEntries &&
777 Entry->getBB() == DirtyBB) {
778 // If we already have an entry, and if it isn't already dirty, the block
779 // is done.
780 if (!Entry->getResult().isDirty())
781 continue;
782
783 // Otherwise, remember this slot so we can update the value.
784 ExistingResult = &*Entry;
785 }
786
787 // If the dirty entry has a pointer, start scanning from it so we don't have
788 // to rescan the entire block.
789 BasicBlock::iterator ScanPos = DirtyBB->end();
790 if (ExistingResult) {
791 if (Instruction *Inst = ExistingResult->getResult().getInst()) {
792 ScanPos = Inst->getIterator();
793 // We're removing QueryInst's use of Inst.
794 RemoveFromReverseMap<Instruction *>(ReverseNonLocalDeps, Inst,
795 QueryCall);
796 }
797 }
798
799 // Find out if this block has a local dependency for QueryInst.
800 MemDepResult Dep;
801
802 if (ScanPos != DirtyBB->begin()) {
803 Dep = getCallDependencyFrom(QueryCall, isReadonlyCall, ScanPos, DirtyBB);
804 } else if (DirtyBB != &DirtyBB->getParent()->getEntryBlock()) {
805 // No dependence found. If this is the entry block of the function, it is
806 // a clobber, otherwise it is unknown.
808 } else {
810 }
811
812 // If we had a dirty entry for the block, update it. Otherwise, just add
813 // a new entry.
814 if (ExistingResult)
815 ExistingResult->setResult(Dep);
816 else
817 Cache.push_back(NonLocalDepEntry(DirtyBB, Dep));
818
819 // If the block has a dependency (i.e. it isn't completely transparent to
820 // the value), remember the association!
821 if (!Dep.isNonLocal()) {
822 // Keep the ReverseNonLocalDeps map up to date so we can efficiently
823 // update this when we remove instructions.
824 if (Instruction *Inst = Dep.getInst())
825 ReverseNonLocalDeps[Inst].insert(QueryCall);
826 } else {
827
828 // If the block *is* completely transparent to the load, we need to check
829 // the predecessors of this block. Add them to our worklist.
830 append_range(DirtyBlocks, PredCache.get(DirtyBB));
831 }
832 }
833
834 return Cache;
835}
836
839 const MemoryLocation Loc = MemoryLocation::get(QueryInst);
840 bool isLoad = isa<LoadInst>(QueryInst);
841 BasicBlock *FromBB = QueryInst->getParent();
842 assert(FromBB);
843
844 assert(Loc.Ptr->getType()->isPointerTy() &&
845 "Can't get pointer deps of a non-pointer!");
846 Result.clear();
847 {
848 // Check if there is cached Def with invariant.group.
849 auto NonLocalDefIt = NonLocalDefsCache.find(QueryInst);
850 if (NonLocalDefIt != NonLocalDefsCache.end()) {
851 Result.push_back(NonLocalDefIt->second);
852 ReverseNonLocalDefsCache[NonLocalDefIt->second.getResult().getInst()]
853 .erase(QueryInst);
854 NonLocalDefsCache.erase(NonLocalDefIt);
855 return;
856 }
857 }
858 // This routine does not expect to deal with volatile instructions.
859 // Doing so would require piping through the QueryInst all the way through.
860 // TODO: volatiles can't be elided, but they can be reordered with other
861 // non-volatile accesses.
862
863 // We currently give up on any instruction which is ordered, but we do handle
864 // atomic instructions which are unordered.
865 // TODO: Handle ordered instructions
866 auto isOrdered = [](Instruction *Inst) {
867 if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
868 return !LI->isUnordered();
869 } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
870 return !SI->isUnordered();
871 }
872 return false;
873 };
874 if (QueryInst->isVolatile() || isOrdered(QueryInst)) {
875 Result.push_back(NonLocalDepResult(FromBB, MemDepResult::getUnknown(),
876 const_cast<Value *>(Loc.Ptr)));
877 return;
878 }
879 const DataLayout &DL = FromBB->getModule()->getDataLayout();
880 PHITransAddr Address(const_cast<Value *>(Loc.Ptr), DL, &AC);
881
882 // This is the set of blocks we've inspected, and the pointer we consider in
883 // each block. Because of critical edges, we currently bail out if querying
884 // a block with multiple different pointers. This can happen during PHI
885 // translation.
887 if (getNonLocalPointerDepFromBB(QueryInst, Address, Loc, isLoad, FromBB,
888 Result, Visited, true))
889 return;
890 Result.clear();
891 Result.push_back(NonLocalDepResult(FromBB, MemDepResult::getUnknown(),
892 const_cast<Value *>(Loc.Ptr)));
893}
894
895/// Compute the memdep value for BB with Pointer/PointeeSize using either
896/// cached information in Cache or by doing a lookup (which may use dirty cache
897/// info if available).
898///
899/// If we do a lookup, add the result to the cache.
900MemDepResult MemoryDependenceResults::getNonLocalInfoForBlock(
901 Instruction *QueryInst, const MemoryLocation &Loc, bool isLoad,
902 BasicBlock *BB, NonLocalDepInfo *Cache, unsigned NumSortedEntries,
903 BatchAAResults &BatchAA) {
904
905 bool isInvariantLoad = false;
906
907 if (LoadInst *LI = dyn_cast_or_null<LoadInst>(QueryInst))
908 isInvariantLoad = LI->getMetadata(LLVMContext::MD_invariant_load);
909
910 // Do a binary search to see if we already have an entry for this block in
911 // the cache set. If so, find it.
912 NonLocalDepInfo::iterator Entry = std::upper_bound(
913 Cache->begin(), Cache->begin() + NumSortedEntries, NonLocalDepEntry(BB));
914 if (Entry != Cache->begin() && (Entry - 1)->getBB() == BB)
915 --Entry;
916
917 NonLocalDepEntry *ExistingResult = nullptr;
918 if (Entry != Cache->begin() + NumSortedEntries && Entry->getBB() == BB)
919 ExistingResult = &*Entry;
920
921 // Use cached result for invariant load only if there is no dependency for non
922 // invariant load. In this case invariant load can not have any dependency as
923 // well.
924 if (ExistingResult && isInvariantLoad &&
925 !ExistingResult->getResult().isNonFuncLocal())
926 ExistingResult = nullptr;
927
928 // If we have a cached entry, and it is non-dirty, use it as the value for
929 // this dependency.
930 if (ExistingResult && !ExistingResult->getResult().isDirty()) {
931 ++NumCacheNonLocalPtr;
932 return ExistingResult->getResult();
933 }
934
935 // Otherwise, we have to scan for the value. If we have a dirty cache
936 // entry, start scanning from its position, otherwise we scan from the end
937 // of the block.
938 BasicBlock::iterator ScanPos = BB->end();
939 if (ExistingResult && ExistingResult->getResult().getInst()) {
940 assert(ExistingResult->getResult().getInst()->getParent() == BB &&
941 "Instruction invalidated?");
942 ++NumCacheDirtyNonLocalPtr;
943 ScanPos = ExistingResult->getResult().getInst()->getIterator();
944
945 // Eliminating the dirty entry from 'Cache', so update the reverse info.
946 ValueIsLoadPair CacheKey(Loc.Ptr, isLoad);
947 RemoveFromReverseMap(ReverseNonLocalPtrDeps, &*ScanPos, CacheKey);
948 } else {
949 ++NumUncacheNonLocalPtr;
950 }
951
952 // Scan the block for the dependency.
953 MemDepResult Dep = getPointerDependencyFrom(Loc, isLoad, ScanPos, BB,
954 QueryInst, nullptr, BatchAA);
955
956 // Don't cache results for invariant load.
957 if (isInvariantLoad)
958 return Dep;
959
960 // If we had a dirty entry for the block, update it. Otherwise, just add
961 // a new entry.
962 if (ExistingResult)
963 ExistingResult->setResult(Dep);
964 else
965 Cache->push_back(NonLocalDepEntry(BB, Dep));
966
967 // If the block has a dependency (i.e. it isn't completely transparent to
968 // the value), remember the reverse association because we just added it
969 // to Cache!
970 if (!Dep.isLocal())
971 return Dep;
972
973 // Keep the ReverseNonLocalPtrDeps map up to date so we can efficiently
974 // update MemDep when we remove instructions.
975 Instruction *Inst = Dep.getInst();
976 assert(Inst && "Didn't depend on anything?");
977 ValueIsLoadPair CacheKey(Loc.Ptr, isLoad);
978 ReverseNonLocalPtrDeps[Inst].insert(CacheKey);
979 return Dep;
980}
981
982/// Sort the NonLocalDepInfo cache, given a certain number of elements in the
983/// array that are already properly ordered.
984///
985/// This is optimized for the case when only a few entries are added.
986static void
988 unsigned NumSortedEntries) {
989 switch (Cache.size() - NumSortedEntries) {
990 case 0:
991 // done, no new entries.
992 break;
993 case 2: {
994 // Two new entries, insert the last one into place.
995 NonLocalDepEntry Val = Cache.back();
996 Cache.pop_back();
997 MemoryDependenceResults::NonLocalDepInfo::iterator Entry =
998 std::upper_bound(Cache.begin(), Cache.end() - 1, Val);
999 Cache.insert(Entry, Val);
1000 [[fallthrough]];
1001 }
1002 case 1:
1003 // One new entry, Just insert the new value at the appropriate position.
1004 if (Cache.size() != 1) {
1005 NonLocalDepEntry Val = Cache.back();
1006 Cache.pop_back();
1007 MemoryDependenceResults::NonLocalDepInfo::iterator Entry =
1008 llvm::upper_bound(Cache, Val);
1009 Cache.insert(Entry, Val);
1010 }
1011 break;
1012 default:
1013 // Added many values, do a full scale sort.
1014 llvm::sort(Cache);
1015 break;
1016 }
1017}
1018
1019/// Perform a dependency query based on pointer/pointeesize starting at the end
1020/// of StartBB.
1021///
1022/// Add any clobber/def results to the results vector and keep track of which
1023/// blocks are visited in 'Visited'.
1024///
1025/// This has special behavior for the first block queries (when SkipFirstBlock
1026/// is true). In this special case, it ignores the contents of the specified
1027/// block and starts returning dependence info for its predecessors.
1028///
1029/// This function returns true on success, or false to indicate that it could
1030/// not compute dependence information for some reason. This should be treated
1031/// as a clobber dependence on the first instruction in the predecessor block.
1032bool MemoryDependenceResults::getNonLocalPointerDepFromBB(
1033 Instruction *QueryInst, const PHITransAddr &Pointer,
1034 const MemoryLocation &Loc, bool isLoad, BasicBlock *StartBB,
1036 DenseMap<BasicBlock *, Value *> &Visited, bool SkipFirstBlock,
1037 bool IsIncomplete) {
1038 // Look up the cached info for Pointer.
1039 ValueIsLoadPair CacheKey(Pointer.getAddr(), isLoad);
1040
1041 // Set up a temporary NLPI value. If the map doesn't yet have an entry for
1042 // CacheKey, this value will be inserted as the associated value. Otherwise,
1043 // it'll be ignored, and we'll have to check to see if the cached size and
1044 // aa tags are consistent with the current query.
1045 NonLocalPointerInfo InitialNLPI;
1046 InitialNLPI.Size = Loc.Size;
1047 InitialNLPI.AATags = Loc.AATags;
1048
1049 bool isInvariantLoad = false;
1050 if (LoadInst *LI = dyn_cast_or_null<LoadInst>(QueryInst))
1051 isInvariantLoad = LI->getMetadata(LLVMContext::MD_invariant_load);
1052
1053 // Get the NLPI for CacheKey, inserting one into the map if it doesn't
1054 // already have one.
1055 std::pair<CachedNonLocalPointerInfo::iterator, bool> Pair =
1056 NonLocalPointerDeps.insert(std::make_pair(CacheKey, InitialNLPI));
1057 NonLocalPointerInfo *CacheInfo = &Pair.first->second;
1058
1059 // If we already have a cache entry for this CacheKey, we may need to do some
1060 // work to reconcile the cache entry and the current query.
1061 // Invariant loads don't participate in caching. Thus no need to reconcile.
1062 if (!isInvariantLoad && !Pair.second) {
1063 if (CacheInfo->Size != Loc.Size) {
1064 bool ThrowOutEverything;
1065 if (CacheInfo->Size.hasValue() && Loc.Size.hasValue()) {
1066 // FIXME: We may be able to do better in the face of results with mixed
1067 // precision. We don't appear to get them in practice, though, so just
1068 // be conservative.
1069 ThrowOutEverything =
1070 CacheInfo->Size.isPrecise() != Loc.Size.isPrecise() ||
1071 CacheInfo->Size.getValue() < Loc.Size.getValue();
1072 } else {
1073 // For our purposes, unknown size > all others.
1074 ThrowOutEverything = !Loc.Size.hasValue();
1075 }
1076
1077 if (ThrowOutEverything) {
1078 // The query's Size is greater than the cached one. Throw out the
1079 // cached data and proceed with the query at the greater size.
1080 CacheInfo->Pair = BBSkipFirstBlockPair();
1081 CacheInfo->Size = Loc.Size;
1082 for (auto &Entry : CacheInfo->NonLocalDeps)
1083 if (Instruction *Inst = Entry.getResult().getInst())
1084 RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
1085 CacheInfo->NonLocalDeps.clear();
1086 // The cache is cleared (in the above line) so we will have lost
1087 // information about blocks we have already visited. We therefore must
1088 // assume that the cache information is incomplete.
1089 IsIncomplete = true;
1090 } else {
1091 // This query's Size is less than the cached one. Conservatively restart
1092 // the query using the greater size.
1093 return getNonLocalPointerDepFromBB(
1094 QueryInst, Pointer, Loc.getWithNewSize(CacheInfo->Size), isLoad,
1095 StartBB, Result, Visited, SkipFirstBlock, IsIncomplete);
1096 }
1097 }
1098
1099 // If the query's AATags are inconsistent with the cached one,
1100 // conservatively throw out the cached data and restart the query with
1101 // no tag if needed.
1102 if (CacheInfo->AATags != Loc.AATags) {
1103 if (CacheInfo->AATags) {
1104 CacheInfo->Pair = BBSkipFirstBlockPair();
1105 CacheInfo->AATags = AAMDNodes();
1106 for (auto &Entry : CacheInfo->NonLocalDeps)
1107 if (Instruction *Inst = Entry.getResult().getInst())
1108 RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
1109 CacheInfo->NonLocalDeps.clear();
1110 // The cache is cleared (in the above line) so we will have lost
1111 // information about blocks we have already visited. We therefore must
1112 // assume that the cache information is incomplete.
1113 IsIncomplete = true;
1114 }
1115 if (Loc.AATags)
1116 return getNonLocalPointerDepFromBB(
1117 QueryInst, Pointer, Loc.getWithoutAATags(), isLoad, StartBB, Result,
1118 Visited, SkipFirstBlock, IsIncomplete);
1119 }
1120 }
1121
1122 NonLocalDepInfo *Cache = &CacheInfo->NonLocalDeps;
1123
1124 // If we have valid cached information for exactly the block we are
1125 // investigating, just return it with no recomputation.
1126 // Don't use cached information for invariant loads since it is valid for
1127 // non-invariant loads only.
1128 if (!IsIncomplete && !isInvariantLoad &&
1129 CacheInfo->Pair == BBSkipFirstBlockPair(StartBB, SkipFirstBlock)) {
1130 // We have a fully cached result for this query then we can just return the
1131 // cached results and populate the visited set. However, we have to verify
1132 // that we don't already have conflicting results for these blocks. Check
1133 // to ensure that if a block in the results set is in the visited set that
1134 // it was for the same pointer query.
1135 if (!Visited.empty()) {
1136 for (auto &Entry : *Cache) {
1138 Visited.find(Entry.getBB());
1139 if (VI == Visited.end() || VI->second == Pointer.getAddr())
1140 continue;
1141
1142 // We have a pointer mismatch in a block. Just return false, saying
1143 // that something was clobbered in this result. We could also do a
1144 // non-fully cached query, but there is little point in doing this.
1145 return false;
1146 }
1147 }
1148
1149 Value *Addr = Pointer.getAddr();
1150 for (auto &Entry : *Cache) {
1151 Visited.insert(std::make_pair(Entry.getBB(), Addr));
1152 if (Entry.getResult().isNonLocal()) {
1153 continue;
1154 }
1155
1156 if (DT.isReachableFromEntry(Entry.getBB())) {
1157 Result.push_back(
1158 NonLocalDepResult(Entry.getBB(), Entry.getResult(), Addr));
1159 }
1160 }
1161 ++NumCacheCompleteNonLocalPtr;
1162 return true;
1163 }
1164
1165 // Otherwise, either this is a new block, a block with an invalid cache
1166 // pointer or one that we're about to invalidate by putting more info into
1167 // it than its valid cache info. If empty and not explicitly indicated as
1168 // incomplete, the result will be valid cache info, otherwise it isn't.
1169 //
1170 // Invariant loads don't affect cache in any way thus no need to update
1171 // CacheInfo as well.
1172 if (!isInvariantLoad) {
1173 if (!IsIncomplete && Cache->empty())
1174 CacheInfo->Pair = BBSkipFirstBlockPair(StartBB, SkipFirstBlock);
1175 else
1176 CacheInfo->Pair = BBSkipFirstBlockPair();
1177 }
1178
1180 Worklist.push_back(StartBB);
1181
1182 // PredList used inside loop.
1184
1185 // Keep track of the entries that we know are sorted. Previously cached
1186 // entries will all be sorted. The entries we add we only sort on demand (we
1187 // don't insert every element into its sorted position). We know that we
1188 // won't get any reuse from currently inserted values, because we don't
1189 // revisit blocks after we insert info for them.
1190 unsigned NumSortedEntries = Cache->size();
1191 unsigned WorklistEntries = BlockNumberLimit;
1192 bool GotWorklistLimit = false;
1193 LLVM_DEBUG(AssertSorted(*Cache));
1194
1195 BatchAAResults BatchAA(AA);
1196 while (!Worklist.empty()) {
1197 BasicBlock *BB = Worklist.pop_back_val();
1198
1199 // If we do process a large number of blocks it becomes very expensive and
1200 // likely it isn't worth worrying about
1201 if (Result.size() > NumResultsLimit) {
1202 // Sort it now (if needed) so that recursive invocations of
1203 // getNonLocalPointerDepFromBB and other routines that could reuse the
1204 // cache value will only see properly sorted cache arrays.
1205 if (Cache && NumSortedEntries != Cache->size()) {
1206 SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
1207 }
1208 // Since we bail out, the "Cache" set won't contain all of the
1209 // results for the query. This is ok (we can still use it to accelerate
1210 // specific block queries) but we can't do the fastpath "return all
1211 // results from the set". Clear out the indicator for this.
1212 CacheInfo->Pair = BBSkipFirstBlockPair();
1213 return false;
1214 }
1215
1216 // Skip the first block if we have it.
1217 if (!SkipFirstBlock) {
1218 // Analyze the dependency of *Pointer in FromBB. See if we already have
1219 // been here.
1220 assert(Visited.count(BB) && "Should check 'visited' before adding to WL");
1221
1222 // Get the dependency info for Pointer in BB. If we have cached
1223 // information, we will use it, otherwise we compute it.
1224 LLVM_DEBUG(AssertSorted(*Cache, NumSortedEntries));
1225 MemDepResult Dep = getNonLocalInfoForBlock(
1226 QueryInst, Loc, isLoad, BB, Cache, NumSortedEntries, BatchAA);
1227
1228 // If we got a Def or Clobber, add this to the list of results.
1229 if (!Dep.isNonLocal()) {
1230 if (DT.isReachableFromEntry(BB)) {
1231 Result.push_back(NonLocalDepResult(BB, Dep, Pointer.getAddr()));
1232 continue;
1233 }
1234 }
1235 }
1236
1237 // If 'Pointer' is an instruction defined in this block, then we need to do
1238 // phi translation to change it into a value live in the predecessor block.
1239 // If not, we just add the predecessors to the worklist and scan them with
1240 // the same Pointer.
1241 if (!Pointer.needsPHITranslationFromBlock(BB)) {
1242 SkipFirstBlock = false;
1244 for (BasicBlock *Pred : PredCache.get(BB)) {
1245 // Verify that we haven't looked at this block yet.
1246 std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> InsertRes =
1247 Visited.insert(std::make_pair(Pred, Pointer.getAddr()));
1248 if (InsertRes.second) {
1249 // First time we've looked at *PI.
1250 NewBlocks.push_back(Pred);
1251 continue;
1252 }
1253
1254 // If we have seen this block before, but it was with a different
1255 // pointer then we have a phi translation failure and we have to treat
1256 // this as a clobber.
1257 if (InsertRes.first->second != Pointer.getAddr()) {
1258 // Make sure to clean up the Visited map before continuing on to
1259 // PredTranslationFailure.
1260 for (unsigned i = 0; i < NewBlocks.size(); i++)
1261 Visited.erase(NewBlocks[i]);
1262 goto PredTranslationFailure;
1263 }
1264 }
1265 if (NewBlocks.size() > WorklistEntries) {
1266 // Make sure to clean up the Visited map before continuing on to
1267 // PredTranslationFailure.
1268 for (unsigned i = 0; i < NewBlocks.size(); i++)
1269 Visited.erase(NewBlocks[i]);
1270 GotWorklistLimit = true;
1271 goto PredTranslationFailure;
1272 }
1273 WorklistEntries -= NewBlocks.size();
1274 Worklist.append(NewBlocks.begin(), NewBlocks.end());
1275 continue;
1276 }
1277
1278 // We do need to do phi translation, if we know ahead of time we can't phi
1279 // translate this value, don't even try.
1280 if (!Pointer.isPotentiallyPHITranslatable())
1281 goto PredTranslationFailure;
1282
1283 // We may have added values to the cache list before this PHI translation.
1284 // If so, we haven't done anything to ensure that the cache remains sorted.
1285 // Sort it now (if needed) so that recursive invocations of
1286 // getNonLocalPointerDepFromBB and other routines that could reuse the cache
1287 // value will only see properly sorted cache arrays.
1288 if (Cache && NumSortedEntries != Cache->size()) {
1289 SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
1290 NumSortedEntries = Cache->size();
1291 }
1292 Cache = nullptr;
1293
1294 PredList.clear();
1295 for (BasicBlock *Pred : PredCache.get(BB)) {
1296 PredList.push_back(std::make_pair(Pred, Pointer));
1297
1298 // Get the PHI translated pointer in this predecessor. This can fail if
1299 // not translatable, in which case the getAddr() returns null.
1300 PHITransAddr &PredPointer = PredList.back().second;
1301 Value *PredPtrVal =
1302 PredPointer.translateValue(BB, Pred, &DT, /*MustDominate=*/false);
1303
1304 // Check to see if we have already visited this pred block with another
1305 // pointer. If so, we can't do this lookup. This failure can occur
1306 // with PHI translation when a critical edge exists and the PHI node in
1307 // the successor translates to a pointer value different than the
1308 // pointer the block was first analyzed with.
1309 std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> InsertRes =
1310 Visited.insert(std::make_pair(Pred, PredPtrVal));
1311
1312 if (!InsertRes.second) {
1313 // We found the pred; take it off the list of preds to visit.
1314 PredList.pop_back();
1315
1316 // If the predecessor was visited with PredPtr, then we already did
1317 // the analysis and can ignore it.
1318 if (InsertRes.first->second == PredPtrVal)
1319 continue;
1320
1321 // Otherwise, the block was previously analyzed with a different
1322 // pointer. We can't represent the result of this case, so we just
1323 // treat this as a phi translation failure.
1324
1325 // Make sure to clean up the Visited map before continuing on to
1326 // PredTranslationFailure.
1327 for (unsigned i = 0, n = PredList.size(); i < n; ++i)
1328 Visited.erase(PredList[i].first);
1329
1330 goto PredTranslationFailure;
1331 }
1332 }
1333
1334 // Actually process results here; this need to be a separate loop to avoid
1335 // calling getNonLocalPointerDepFromBB for blocks we don't want to return
1336 // any results for. (getNonLocalPointerDepFromBB will modify our
1337 // datastructures in ways the code after the PredTranslationFailure label
1338 // doesn't expect.)
1339 for (unsigned i = 0, n = PredList.size(); i < n; ++i) {
1340 BasicBlock *Pred = PredList[i].first;
1341 PHITransAddr &PredPointer = PredList[i].second;
1342 Value *PredPtrVal = PredPointer.getAddr();
1343
1344 bool CanTranslate = true;
1345 // If PHI translation was unable to find an available pointer in this
1346 // predecessor, then we have to assume that the pointer is clobbered in
1347 // that predecessor. We can still do PRE of the load, which would insert
1348 // a computation of the pointer in this predecessor.
1349 if (!PredPtrVal)
1350 CanTranslate = false;
1351
1352 // FIXME: it is entirely possible that PHI translating will end up with
1353 // the same value. Consider PHI translating something like:
1354 // X = phi [x, bb1], [y, bb2]. PHI translating for bb1 doesn't *need*
1355 // to recurse here, pedantically speaking.
1356
1357 // If getNonLocalPointerDepFromBB fails here, that means the cached
1358 // result conflicted with the Visited list; we have to conservatively
1359 // assume it is unknown, but this also does not block PRE of the load.
1360 if (!CanTranslate ||
1361 !getNonLocalPointerDepFromBB(QueryInst, PredPointer,
1362 Loc.getWithNewPtr(PredPtrVal), isLoad,
1363 Pred, Result, Visited)) {
1364 // Add the entry to the Result list.
1365 NonLocalDepResult Entry(Pred, MemDepResult::getUnknown(), PredPtrVal);
1366 Result.push_back(Entry);
1367
1368 // Since we had a phi translation failure, the cache for CacheKey won't
1369 // include all of the entries that we need to immediately satisfy future
1370 // queries. Mark this in NonLocalPointerDeps by setting the
1371 // BBSkipFirstBlockPair pointer to null. This requires reuse of the
1372 // cached value to do more work but not miss the phi trans failure.
1373 NonLocalPointerInfo &NLPI = NonLocalPointerDeps[CacheKey];
1374 NLPI.Pair = BBSkipFirstBlockPair();
1375 continue;
1376 }
1377 }
1378
1379 // Refresh the CacheInfo/Cache pointer so that it isn't invalidated.
1380 CacheInfo = &NonLocalPointerDeps[CacheKey];
1381 Cache = &CacheInfo->NonLocalDeps;
1382 NumSortedEntries = Cache->size();
1383
1384 // Since we did phi translation, the "Cache" set won't contain all of the
1385 // results for the query. This is ok (we can still use it to accelerate
1386 // specific block queries) but we can't do the fastpath "return all
1387 // results from the set" Clear out the indicator for this.
1388 CacheInfo->Pair = BBSkipFirstBlockPair();
1389 SkipFirstBlock = false;
1390 continue;
1391
1392 PredTranslationFailure:
1393 // The following code is "failure"; we can't produce a sane translation
1394 // for the given block. It assumes that we haven't modified any of
1395 // our datastructures while processing the current block.
1396
1397 if (!Cache) {
1398 // Refresh the CacheInfo/Cache pointer if it got invalidated.
1399 CacheInfo = &NonLocalPointerDeps[CacheKey];
1400 Cache = &CacheInfo->NonLocalDeps;
1401 NumSortedEntries = Cache->size();
1402 }
1403
1404 // Since we failed phi translation, the "Cache" set won't contain all of the
1405 // results for the query. This is ok (we can still use it to accelerate
1406 // specific block queries) but we can't do the fastpath "return all
1407 // results from the set". Clear out the indicator for this.
1408 CacheInfo->Pair = BBSkipFirstBlockPair();
1409
1410 // If *nothing* works, mark the pointer as unknown.
1411 //
1412 // If this is the magic first block, return this as a clobber of the whole
1413 // incoming value. Since we can't phi translate to one of the predecessors,
1414 // we have to bail out.
1415 if (SkipFirstBlock)
1416 return false;
1417
1418 // Results of invariant loads are not cached thus no need to update cached
1419 // information.
1420 if (!isInvariantLoad) {
1421 for (NonLocalDepEntry &I : llvm::reverse(*Cache)) {
1422 if (I.getBB() != BB)
1423 continue;
1424
1425 assert((GotWorklistLimit || I.getResult().isNonLocal() ||
1426 !DT.isReachableFromEntry(BB)) &&
1427 "Should only be here with transparent block");
1428
1429 I.setResult(MemDepResult::getUnknown());
1430
1431
1432 break;
1433 }
1434 }
1435 (void)GotWorklistLimit;
1436 // Go ahead and report unknown dependence.
1437 Result.push_back(
1439 }
1440
1441 // Okay, we're done now. If we added new values to the cache, re-sort it.
1442 SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
1443 LLVM_DEBUG(AssertSorted(*Cache));
1444 return true;
1445}
1446
1447/// If P exists in CachedNonLocalPointerInfo or NonLocalDefsCache, remove it.
1448void MemoryDependenceResults::removeCachedNonLocalPointerDependencies(
1449 ValueIsLoadPair P) {
1450
1451 // Most of the time this cache is empty.
1452 if (!NonLocalDefsCache.empty()) {
1453 auto it = NonLocalDefsCache.find(P.getPointer());
1454 if (it != NonLocalDefsCache.end()) {
1455 RemoveFromReverseMap(ReverseNonLocalDefsCache,
1456 it->second.getResult().getInst(), P.getPointer());
1457 NonLocalDefsCache.erase(it);
1458 }
1459
1460 if (auto *I = dyn_cast<Instruction>(P.getPointer())) {
1461 auto toRemoveIt = ReverseNonLocalDefsCache.find(I);
1462 if (toRemoveIt != ReverseNonLocalDefsCache.end()) {
1463 for (const auto *entry : toRemoveIt->second)
1464 NonLocalDefsCache.erase(entry);
1465 ReverseNonLocalDefsCache.erase(toRemoveIt);
1466 }
1467 }
1468 }
1469
1470 CachedNonLocalPointerInfo::iterator It = NonLocalPointerDeps.find(P);
1471 if (It == NonLocalPointerDeps.end())
1472 return;
1473
1474 // Remove all of the entries in the BB->val map. This involves removing
1475 // instructions from the reverse map.
1476 NonLocalDepInfo &PInfo = It->second.NonLocalDeps;
1477
1478 for (const NonLocalDepEntry &DE : PInfo) {
1479 Instruction *Target = DE.getResult().getInst();
1480 if (!Target)
1481 continue; // Ignore non-local dep results.
1482 assert(Target->getParent() == DE.getBB());
1483
1484 // Eliminating the dirty entry from 'Cache', so update the reverse info.
1485 RemoveFromReverseMap(ReverseNonLocalPtrDeps, Target, P);
1486 }
1487
1488 // Remove P from NonLocalPointerDeps (which deletes NonLocalDepInfo).
1489 NonLocalPointerDeps.erase(It);
1490}
1491
1493 // If Ptr isn't really a pointer, just ignore it.
1494 if (!Ptr->getType()->isPointerTy())
1495 return;
1496 // Flush store info for the pointer.
1497 removeCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, false));
1498 // Flush load info for the pointer.
1499 removeCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, true));
1500}
1501
1503 PredCache.clear();
1504}
1505
1507 // Walk through the Non-local dependencies, removing this one as the value
1508 // for any cached queries.
1509 NonLocalDepMapType::iterator NLDI = NonLocalDepsMap.find(RemInst);
1510 if (NLDI != NonLocalDepsMap.end()) {
1511 NonLocalDepInfo &BlockMap = NLDI->second.first;
1512 for (auto &Entry : BlockMap)
1513 if (Instruction *Inst = Entry.getResult().getInst())
1514 RemoveFromReverseMap(ReverseNonLocalDeps, Inst, RemInst);
1515 NonLocalDepsMap.erase(NLDI);
1516 }
1517
1518 // If we have a cached local dependence query for this instruction, remove it.
1519 LocalDepMapType::iterator LocalDepEntry = LocalDeps.find(RemInst);
1520 if (LocalDepEntry != LocalDeps.end()) {
1521 // Remove us from DepInst's reverse set now that the local dep info is gone.
1522 if (Instruction *Inst = LocalDepEntry->second.getInst())
1523 RemoveFromReverseMap(ReverseLocalDeps, Inst, RemInst);
1524
1525 // Remove this local dependency info.
1526 LocalDeps.erase(LocalDepEntry);
1527 }
1528
1529 // If we have any cached dependencies on this instruction, remove
1530 // them.
1531
1532 // If the instruction is a pointer, remove it from both the load info and the
1533 // store info.
1534 if (RemInst->getType()->isPointerTy()) {
1535 removeCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, false));
1536 removeCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, true));
1537 } else {
1538 // Otherwise, if the instructions is in the map directly, it must be a load.
1539 // Remove it.
1540 auto toRemoveIt = NonLocalDefsCache.find(RemInst);
1541 if (toRemoveIt != NonLocalDefsCache.end()) {
1542 assert(isa<LoadInst>(RemInst) &&
1543 "only load instructions should be added directly");
1544 const Instruction *DepV = toRemoveIt->second.getResult().getInst();
1545 ReverseNonLocalDefsCache.find(DepV)->second.erase(RemInst);
1546 NonLocalDefsCache.erase(toRemoveIt);
1547 }
1548 }
1549
1550 // Loop over all of the things that depend on the instruction we're removing.
1552
1553 // If we find RemInst as a clobber or Def in any of the maps for other values,
1554 // we need to replace its entry with a dirty version of the instruction after
1555 // it. If RemInst is a terminator, we use a null dirty value.
1556 //
1557 // Using a dirty version of the instruction after RemInst saves having to scan
1558 // the entire block to get to this point.
1559 MemDepResult NewDirtyVal;
1560 if (!RemInst->isTerminator())
1561 NewDirtyVal = MemDepResult::getDirty(&*++RemInst->getIterator());
1562
1563 ReverseDepMapType::iterator ReverseDepIt = ReverseLocalDeps.find(RemInst);
1564 if (ReverseDepIt != ReverseLocalDeps.end()) {
1565 // RemInst can't be the terminator if it has local stuff depending on it.
1566 assert(!ReverseDepIt->second.empty() && !RemInst->isTerminator() &&
1567 "Nothing can locally depend on a terminator");
1568
1569 for (Instruction *InstDependingOnRemInst : ReverseDepIt->second) {
1570 assert(InstDependingOnRemInst != RemInst &&
1571 "Already removed our local dep info");
1572
1573 LocalDeps[InstDependingOnRemInst] = NewDirtyVal;
1574
1575 // Make sure to remember that new things depend on NewDepInst.
1576 assert(NewDirtyVal.getInst() &&
1577 "There is no way something else can have "
1578 "a local dep on this if it is a terminator!");
1579 ReverseDepsToAdd.push_back(
1580 std::make_pair(NewDirtyVal.getInst(), InstDependingOnRemInst));
1581 }
1582
1583 ReverseLocalDeps.erase(ReverseDepIt);
1584
1585 // Add new reverse deps after scanning the set, to avoid invalidating the
1586 // 'ReverseDeps' reference.
1587 while (!ReverseDepsToAdd.empty()) {
1588 ReverseLocalDeps[ReverseDepsToAdd.back().first].insert(
1589 ReverseDepsToAdd.back().second);
1590 ReverseDepsToAdd.pop_back();
1591 }
1592 }
1593
1594 ReverseDepIt = ReverseNonLocalDeps.find(RemInst);
1595 if (ReverseDepIt != ReverseNonLocalDeps.end()) {
1596 for (Instruction *I : ReverseDepIt->second) {
1597 assert(I != RemInst && "Already removed NonLocalDep info for RemInst");
1598
1599 PerInstNLInfo &INLD = NonLocalDepsMap[I];
1600 // The information is now dirty!
1601 INLD.second = true;
1602
1603 for (auto &Entry : INLD.first) {
1604 if (Entry.getResult().getInst() != RemInst)
1605 continue;
1606
1607 // Convert to a dirty entry for the subsequent instruction.
1608 Entry.setResult(NewDirtyVal);
1609
1610 if (Instruction *NextI = NewDirtyVal.getInst())
1611 ReverseDepsToAdd.push_back(std::make_pair(NextI, I));
1612 }
1613 }
1614
1615 ReverseNonLocalDeps.erase(ReverseDepIt);
1616
1617 // Add new reverse deps after scanning the set, to avoid invalidating 'Set'
1618 while (!ReverseDepsToAdd.empty()) {
1619 ReverseNonLocalDeps[ReverseDepsToAdd.back().first].insert(
1620 ReverseDepsToAdd.back().second);
1621 ReverseDepsToAdd.pop_back();
1622 }
1623 }
1624
1625 // If the instruction is in ReverseNonLocalPtrDeps then it appears as a
1626 // value in the NonLocalPointerDeps info.
1627 ReverseNonLocalPtrDepTy::iterator ReversePtrDepIt =
1628 ReverseNonLocalPtrDeps.find(RemInst);
1629 if (ReversePtrDepIt != ReverseNonLocalPtrDeps.end()) {
1631 ReversePtrDepsToAdd;
1632
1633 for (ValueIsLoadPair P : ReversePtrDepIt->second) {
1634 assert(P.getPointer() != RemInst &&
1635 "Already removed NonLocalPointerDeps info for RemInst");
1636
1637 NonLocalDepInfo &NLPDI = NonLocalPointerDeps[P].NonLocalDeps;
1638
1639 // The cache is not valid for any specific block anymore.
1640 NonLocalPointerDeps[P].Pair = BBSkipFirstBlockPair();
1641
1642 // Update any entries for RemInst to use the instruction after it.
1643 for (auto &Entry : NLPDI) {
1644 if (Entry.getResult().getInst() != RemInst)
1645 continue;
1646
1647 // Convert to a dirty entry for the subsequent instruction.
1648 Entry.setResult(NewDirtyVal);
1649
1650 if (Instruction *NewDirtyInst = NewDirtyVal.getInst())
1651 ReversePtrDepsToAdd.push_back(std::make_pair(NewDirtyInst, P));
1652 }
1653
1654 // Re-sort the NonLocalDepInfo. Changing the dirty entry to its
1655 // subsequent value may invalidate the sortedness.
1656 llvm::sort(NLPDI);
1657 }
1658
1659 ReverseNonLocalPtrDeps.erase(ReversePtrDepIt);
1660
1661 while (!ReversePtrDepsToAdd.empty()) {
1662 ReverseNonLocalPtrDeps[ReversePtrDepsToAdd.back().first].insert(
1663 ReversePtrDepsToAdd.back().second);
1664 ReversePtrDepsToAdd.pop_back();
1665 }
1666 }
1667
1668 assert(!NonLocalDepsMap.count(RemInst) && "RemInst got reinserted?");
1669 LLVM_DEBUG(verifyRemoved(RemInst));
1670}
1671
1672/// Verify that the specified instruction does not occur in our internal data
1673/// structures.
1674///
1675/// This function verifies by asserting in debug builds.
1676void MemoryDependenceResults::verifyRemoved(Instruction *D) const {
1677#ifndef NDEBUG
1678 for (const auto &DepKV : LocalDeps) {
1679 assert(DepKV.first != D && "Inst occurs in data structures");
1680 assert(DepKV.second.getInst() != D && "Inst occurs in data structures");
1681 }
1682
1683 for (const auto &DepKV : NonLocalPointerDeps) {
1684 assert(DepKV.first.getPointer() != D && "Inst occurs in NLPD map key");
1685 for (const auto &Entry : DepKV.second.NonLocalDeps)
1686 assert(Entry.getResult().getInst() != D && "Inst occurs as NLPD value");
1687 }
1688
1689 for (const auto &DepKV : NonLocalDepsMap) {
1690 assert(DepKV.first != D && "Inst occurs in data structures");
1691 const PerInstNLInfo &INLD = DepKV.second;
1692 for (const auto &Entry : INLD.first)
1693 assert(Entry.getResult().getInst() != D &&
1694 "Inst occurs in data structures");
1695 }
1696
1697 for (const auto &DepKV : ReverseLocalDeps) {
1698 assert(DepKV.first != D && "Inst occurs in data structures");
1699 for (Instruction *Inst : DepKV.second)
1700 assert(Inst != D && "Inst occurs in data structures");
1701 }
1702
1703 for (const auto &DepKV : ReverseNonLocalDeps) {
1704 assert(DepKV.first != D && "Inst occurs in data structures");
1705 for (Instruction *Inst : DepKV.second)
1706 assert(Inst != D && "Inst occurs in data structures");
1707 }
1708
1709 for (const auto &DepKV : ReverseNonLocalPtrDeps) {
1710 assert(DepKV.first != D && "Inst occurs in rev NLPD map");
1711
1712 for (ValueIsLoadPair P : DepKV.second)
1713 assert(P != ValueIsLoadPair(D, false) && P != ValueIsLoadPair(D, true) &&
1714 "Inst occurs in ReverseNonLocalPtrDeps map");
1715 }
1716#endif
1717}
1718
1719AnalysisKey MemoryDependenceAnalysis::Key;
1720
1722 : DefaultBlockScanLimit(BlockScanLimit) {}
1723
1726 auto &AA = AM.getResult<AAManager>(F);
1727 auto &AC = AM.getResult<AssumptionAnalysis>(F);
1728 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
1729 auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1730 return MemoryDependenceResults(AA, AC, TLI, DT, DefaultBlockScanLimit);
1731}
1732
1734
1736 "Memory Dependence Analysis", false, true)
1743
1746}
1747
1749
1751 MemDep.reset();
1752}
1753
1755 AU.setPreservesAll();
1760}
1761
1764 // Check whether our analysis is preserved.
1765 auto PAC = PA.getChecker<MemoryDependenceAnalysis>();
1766 if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>())
1767 // If not, give up now.
1768 return true;
1769
1770 // Check whether the analyses we depend on became invalid for any reason.
1771 if (Inv.invalidate<AAManager>(F, PA) ||
1772 Inv.invalidate<AssumptionAnalysis>(F, PA) ||
1774 return true;
1775
1776 // Otherwise this analysis result remains valid.
1777 return false;
1778}
1779
1781 return DefaultBlockScanLimit;
1782}
1783
1785 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
1786 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1787 auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
1788 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1789 MemDep.emplace(AA, AC, TLI, DT, BlockScanLimit);
1790 return false;
1791}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static bool isLoad(int Opcode)
Atomic ordering constants.
basic Basic Alias true
block Block Frequency Analysis
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
uint64_t Addr
Hexagon Common GEP
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
static const unsigned int NumResultsLimit
static ModRefInfo GetLocation(const Instruction *Inst, MemoryLocation &Loc, const TargetLibraryInfo &TLI)
If the given instruction references a specific memory location, fill in Loc with the details,...
static cl::opt< unsigned > BlockNumberLimit("memdep-block-number-limit", cl::Hidden, cl::init(200), cl::desc("The number of blocks to scan during memory " "dependency analysis (default = 200)"))
static void RemoveFromReverseMap(DenseMap< Instruction *, SmallPtrSet< KeyTy, 4 > > &ReverseMap, Instruction *Inst, KeyTy Val)
This is a helper function that removes Val from 'Inst's set in ReverseMap.
static void SortNonLocalDepInfoCache(MemoryDependenceResults::NonLocalDepInfo &Cache, unsigned NumSortedEntries)
Sort the NonLocalDepInfo cache, given a certain number of elements in the array that are already prop...
static void AssertSorted(MemoryDependenceResults::NonLocalDepInfo &Cache, int Count=-1)
This method is used when -debug is specified to verify that cache arrays are properly kept sorted.
static cl::opt< unsigned > BlockScanLimit("memdep-block-scan-limit", cl::Hidden, cl::init(100), cl::desc("The number of instructions to scan in a block in memory " "dependency analysis (default = 100)"))
This file provides utility analysis objects describing memory locations.
static bool isOrdered(const Instruction *I)
Definition: MemorySSA.cpp:1712
This file contains the declarations for metadata subclasses.
Module.h This file contains the declarations for the Module class.
print Instructions which execute on loop entry
#define P(N)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
@ SI
@ VI
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
This defines the Use class.
A manager for alias analyses.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Check whether or not an instruction may read or write the optionally specified memory location.
bool onlyReadsMemory(const CallBase *Call)
Checks if the specified call is known to only read from non-volatile memory (or not access memory at ...
The possible results of an alias query.
Definition: AliasAnalysis.h:83
@ NoAlias
The two locations do not alias at all.
@ PartialAlias
The two locations alias, but only due to a partial overlap.
@ MustAlias
The two locations precisely alias each other.
This templated class represents "all analyses that operate over <a particular IR unit>" (e....
Definition: PassManager.h:90
API to communicate dependencies between analyses during invalidation.
Definition: PassManager.h:661
bool invalidate(IRUnitT &IR, const PreservedAnalyses &PA)
Trigger the invalidation of some other analysis pass if not already handled and return whether it was...
Definition: PassManager.h:679
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:620
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:774
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
AnalysisUsage & addRequiredTransitive()
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
iterator end()
Definition: BasicBlock.h:316
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:314
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:112
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:87
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:146
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
ModRefInfo getModRefInfoMask(const MemoryLocation &Loc, bool IgnoreLocals=false)
ModRefInfo callCapturesBefore(const Instruction *I, const MemoryLocation &MemLoc, DominatorTree *DT)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1186
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
bool erase(const KeyT &Val)
Definition: DenseMap.h:315
DenseMapIterator< KeyT, ValueT, KeyInfoT, BucketT > iterator
Definition: DenseMap.h:71
bool empty() const
Definition: DenseMap.h:98
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:151
iterator end()
Definition: DenseMap.h:84
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
Dependence - This class represents a dependence between two memory memory references in a function.
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:314
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:321
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
An instruction for ordering other memory operations.
Definition: Instructions.h:436
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
const BasicBlock & getEntryBlock() const
Definition: Function.h:740
bool mayWriteToMemory() const LLVM_READONLY
Return true if this instruction may modify memory.
bool hasMetadata() const
Return true if this instruction has any metadata attached to it.
Definition: Instruction.h:257
const BasicBlock * getParent() const
Definition: Instruction.h:90
bool isTerminator() const
Definition: Instruction.h:171
bool mayReadFromMemory() const LLVM_READONLY
Return true if this instruction may read memory.
bool isVolatile() const LLVM_READONLY
Return true if this instruction has a volatile memory access.
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47
An instruction for reading from memory.
Definition: Instructions.h:177
Value * getPointerOperand()
Definition: Instructions.h:264
bool hasValue() const
uint64_t getValue() const
bool isPrecise() const
A memory dependence query can return one of three different answers.
bool isNonLocal() const
Tests if this MemDepResult represents a query that is transparent to the start of the block,...
static MemDepResult getNonLocal()
bool isNonFuncLocal() const
Tests if this MemDepResult represents a query that is transparent to the start of the function.
static MemDepResult getClobber(Instruction *Inst)
bool isDef() const
Tests if this MemDepResult represents a query that is an instruction definition dependency.
static MemDepResult getUnknown()
bool isLocal() const
Tests if this MemDepResult represents a valid local query (Clobber/Def).
bool isUnknown() const
Tests if this MemDepResult represents a query which cannot and/or will not be computed.
static MemDepResult getNonFuncLocal()
static MemDepResult getDef(Instruction *Inst)
get methods: These are static ctor methods for creating various MemDepResult kinds.
Instruction * getInst() const
If this is a normal dependency, returns the instruction that is depended on.
An analysis that produces MemoryDependenceResults for a function.
MemoryDependenceResults run(Function &F, FunctionAnalysisManager &AM)
Provides a lazy, caching interface for making common memory aliasing information queries,...
MemDepResult getSimplePointerDependencyFrom(const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt, BasicBlock *BB, Instruction *QueryInst, unsigned *Limit, BatchAAResults &BatchAA)
std::vector< NonLocalDepEntry > NonLocalDepInfo
void invalidateCachedPredecessors()
Clears the PredIteratorCache info.
void invalidateCachedPointerInfo(Value *Ptr)
Invalidates cached information about the specified pointer, because it may be too conservative in mem...
MemDepResult getPointerDependencyFrom(const MemoryLocation &Loc, bool isLoad, BasicBlock::iterator ScanIt, BasicBlock *BB, Instruction *QueryInst=nullptr, unsigned *Limit=nullptr)
Returns the instruction on which a memory location depends.
void removeInstruction(Instruction *InstToRemove)
Removes an instruction from the dependence analysis, updating the dependence of instructions that pre...
MemDepResult getInvariantGroupPointerDependency(LoadInst *LI, BasicBlock *BB)
This analysis looks for other loads and stores with invariant.group metadata and the same pointer ope...
unsigned getDefaultBlockScanLimit() const
Some methods limit the number of instructions they will examine.
MemDepResult getDependency(Instruction *QueryInst)
Returns the instruction on which a memory operation depends.
const NonLocalDepInfo & getNonLocalCallDependency(CallBase *QueryCall)
Perform a full dependency query for the specified call, returning the set of blocks that the value is...
void getNonLocalPointerDependency(Instruction *QueryInst, SmallVectorImpl< NonLocalDepResult > &Result)
Perform a full dependency query for an access to the QueryInst's specified memory location,...
bool invalidate(Function &F, const PreservedAnalyses &PA, FunctionAnalysisManager::Invalidator &Inv)
Handle invalidation in the new PM.
A wrapper analysis pass for the legacy pass manager that exposes a MemoryDepnedenceResults instance.
bool runOnFunction(Function &) override
Pass Implementation stuff. This doesn't do any analysis eagerly.
void getAnalysisUsage(AnalysisUsage &AU) const override
Does not modify anything. It uses Value Numbering and Alias Analysis.
void releaseMemory() override
Clean up memory in between runs.
Representation for a specific memory location.
MemoryLocation getWithNewSize(LocationSize NewSize) const
MemoryLocation getWithoutAATags() const
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
LocationSize Size
The maximum size of the location, in address-units, or UnknownSize if the size is not known.
static MemoryLocation getAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location after Ptr, while remaining within the underlying objec...
MemoryLocation getWithNewPtr(const Value *NewPtr) const
AAMDNodes AATags
The metadata nodes which describes the aliasing of the location (each member is null if that kind of ...
const Value * Ptr
The address of the start of the location.
static MemoryLocation getForArgument(const CallBase *Call, unsigned ArgIdx, const TargetLibraryInfo *TLI)
Return a location representing a particular argument of a call.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:398
This is an entry in the NonLocalDepInfo cache.
void setResult(const MemDepResult &R)
const MemDepResult & getResult() const
This is a result from a NonLocal dependence query.
PHITransAddr - An address value which tracks and handles phi translation.
Definition: PHITransAddr.h:35
Value * translateValue(BasicBlock *CurBB, BasicBlock *PredBB, const DominatorTree *DT, bool MustDominate)
translateValue - PHI translate the current address up the CFG from CurBB to Pred, updating our state ...
Value * getAddr() const
Definition: PHITransAddr.h:58
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
void clear()
clear - Remove all information.
ArrayRef< BasicBlock * > get(BasicBlock *BB)
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
PreservedAnalysisChecker getChecker() const
Build a checker for this PreservedAnalyses and the specified analysis type.
Definition: PassManager.h:310
size_type size() const
Definition: SmallPtrSet.h:93
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:687
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
An instruction for storing to memory.
Definition: Instructions.h:301
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
Target - Wrapper for Target specific information.
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:258
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
This class represents the va_arg llvm instruction, which returns an argument of the specified type gi...
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:685
self_iterator getIterator()
Definition: ilist_node.h:82
This class provides various memory handling functions that manipulate MemoryBlock instances.
Definition: Memory.h:52
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void initializeMemoryDependenceWrapperPassPass(PassRegistry &)
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
Definition: STLExtras.h:2129
bool isStrongerThanUnordered(AtomicOrdering AO)
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value,...
bool isNoAliasCall(const Value *V)
Return true if this pointer is returned by a noalias function.
auto upper_bound(R &&Range, T &&Value)
Provide wrappers to std::upper_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:2051
const Value * getPointerOperand(const Value *V)
A helper function that returns the pointer operand of a load, store or GEP instruction.
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:511
bool isModSet(const ModRefInfo MRI)
Definition: ModRef.h:48
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1744
bool isModOrRefSet(const ModRefInfo MRI)
Definition: ModRef.h:42
AtomicOrdering
Atomic ordering for LLVM's memory model.
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
Definition: ModRef.h:27
@ Ref
The access may reference the value stored in memory.
@ Mod
The access may modify the value stored in memory.
@ NoModRef
The access neither references nor modifies the value stored in memory.
Value * getFreedOperand(const CallBase *CB, const TargetLibraryInfo *TLI)
If this if a call to a free function, return the freed operand.
bool isModAndRefSet(const ModRefInfo MRI)
Definition: ModRef.h:45
bool isNoModRef(const ModRefInfo MRI)
Definition: ModRef.h:39
bool isStrongerThan(AtomicOrdering AO, AtomicOrdering Other)
Returns true if ao is stronger than other as defined by the AtomicOrdering lattice,...
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:651
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: PassManager.h:69