LLVM 23.0.0git
MemoryDependenceAnalysis.cpp
Go to the documentation of this file.
1//===- MemoryDependenceAnalysis.cpp - Mem Deps Implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements an analysis that determines, for a given memory
10// operation, what preceding memory operations it depends on. It builds on
11// alias analysis information, and tries to provide a lazy, caching interface to
12// a common kind of alias information query.
13//
14//===----------------------------------------------------------------------===//
15
17#include "llvm/ADT/DenseMap.h"
18#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/Statistic.h"
29#include "llvm/IR/BasicBlock.h"
30#include "llvm/IR/Dominators.h"
31#include "llvm/IR/Function.h"
32#include "llvm/IR/InstrTypes.h"
33#include "llvm/IR/Instruction.h"
36#include "llvm/IR/LLVMContext.h"
37#include "llvm/IR/Metadata.h"
38#include "llvm/IR/Module.h"
40#include "llvm/IR/Type.h"
41#include "llvm/IR/Use.h"
42#include "llvm/IR/Value.h"
44#include "llvm/Pass.h"
49#include "llvm/Support/Debug.h"
50#include <algorithm>
51#include <cassert>
52#include <iterator>
53#include <utility>
54
55using namespace llvm;
56
57#define DEBUG_TYPE "memdep"
58
59STATISTIC(NumCacheNonLocal, "Number of fully cached non-local responses");
60STATISTIC(NumCacheDirtyNonLocal, "Number of dirty cached non-local responses");
61STATISTIC(NumUncacheNonLocal, "Number of uncached non-local responses");
62
63STATISTIC(NumCacheNonLocalPtr,
64 "Number of fully cached non-local ptr responses");
65STATISTIC(NumCacheDirtyNonLocalPtr,
66 "Number of cached, but dirty, non-local ptr responses");
67STATISTIC(NumUncacheNonLocalPtr, "Number of uncached non-local ptr responses");
68STATISTIC(NumCacheCompleteNonLocalPtr,
69 "Number of block queries that were completely cached");
70
71// Limit for the number of instructions to scan in a block.
72
74 "memdep-block-scan-limit", cl::Hidden, cl::init(100),
75 cl::desc("The number of instructions to scan in a block in memory "
76 "dependency analysis (default = 100)"));
77
79 BlockNumberLimit("memdep-block-number-limit", cl::Hidden, cl::init(200),
80 cl::desc("The number of blocks to scan during memory "
81 "dependency analysis (default = 200)"));
82
84 "memdep-cache-global-limit", cl::Hidden, cl::init(10000),
85 cl::desc("The max number of entries allowed in a cache (default = 10000)"));
86
87// Limit on the number of memdep results to process.
88static const unsigned int NumResultsLimit = 100;
89
90/// This is a helper function that removes Val from 'Inst's set in ReverseMap.
91///
92/// If the set becomes empty, remove Inst's entry.
93template <typename KeyTy>
94static void
96 Instruction *Inst, KeyTy Val) {
98 ReverseMap.find(Inst);
99 assert(InstIt != ReverseMap.end() && "Reverse map out of sync?");
100 bool Found = InstIt->second.erase(Val);
101 assert(Found && "Invalid reverse map!");
102 (void)Found;
103 if (InstIt->second.empty())
104 ReverseMap.erase(InstIt);
105}
106
107/// If the given instruction references a specific memory location, fill in Loc
108/// with the details, otherwise set Loc.Ptr to null.
109///
110/// Returns a ModRefInfo value describing the general behavior of the
111/// instruction.
113 const TargetLibraryInfo &TLI) {
114 if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
115 if (LI->isUnordered()) {
117 return ModRefInfo::Ref;
118 }
119 if (LI->getOrdering() == AtomicOrdering::Monotonic) {
121 return ModRefInfo::ModRef;
122 }
124 return ModRefInfo::ModRef;
125 }
126
127 if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
128 if (SI->isUnordered()) {
130 return ModRefInfo::Mod;
131 }
132 if (SI->getOrdering() == AtomicOrdering::Monotonic) {
134 return ModRefInfo::ModRef;
135 }
137 return ModRefInfo::ModRef;
138 }
139
140 if (const VAArgInst *V = dyn_cast<VAArgInst>(Inst)) {
142 return ModRefInfo::ModRef;
143 }
144
145 if (const CallBase *CB = dyn_cast<CallBase>(Inst)) {
146 if (Value *FreedOp = getFreedOperand(CB, &TLI)) {
147 // calls to free() deallocate the entire structure
148 Loc = MemoryLocation::getAfter(FreedOp);
149 return ModRefInfo::Mod;
150 }
151 }
152
153 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
154 switch (II->getIntrinsicID()) {
155 case Intrinsic::lifetime_start:
156 case Intrinsic::lifetime_end:
158 // These intrinsics don't really modify the memory, but returning Mod
159 // will allow them to be handled conservatively.
160 return ModRefInfo::Mod;
161 case Intrinsic::invariant_start:
163 // These intrinsics don't really modify the memory, but returning Mod
164 // will allow them to be handled conservatively.
165 return ModRefInfo::Mod;
166 case Intrinsic::invariant_end:
168 // These intrinsics don't really modify the memory, but returning Mod
169 // will allow them to be handled conservatively.
170 return ModRefInfo::Mod;
171 case Intrinsic::masked_load:
173 return ModRefInfo::Ref;
174 case Intrinsic::masked_store:
176 return ModRefInfo::Mod;
177 default:
178 break;
179 }
180 }
181
182 // Otherwise, just do the coarse-grained thing that always works.
183 if (Inst->mayWriteToMemory())
184 return ModRefInfo::ModRef;
185 if (Inst->mayReadFromMemory())
186 return ModRefInfo::Ref;
188}
189
190/// Private helper for finding the local dependencies of a call site.
191MemDepResult MemoryDependenceResults::getCallDependencyFrom(
192 CallBase *Call, bool isReadOnlyCall, BasicBlock::iterator ScanIt,
193 BasicBlock *BB) {
194 unsigned Limit = getDefaultBlockScanLimit();
195
196 // Walk backwards through the block, looking for dependencies.
197 while (ScanIt != BB->begin()) {
198 Instruction *Inst = &*--ScanIt;
199
200 // Limit the amount of scanning we do so we don't end up with quadratic
201 // running time on extreme testcases.
202 --Limit;
203 if (!Limit)
205
206 // If this inst is a memory op, get the pointer it accessed
207 MemoryLocation Loc;
208 ModRefInfo MR = GetLocation(Inst, Loc, TLI);
209 if (Loc.Ptr) {
210 // A simple instruction.
211 if (isModOrRefSet(AA.getModRefInfo(Call, Loc)))
212 return MemDepResult::getClobber(Inst);
213 continue;
214 }
215
216 if (auto *CallB = dyn_cast<CallBase>(Inst)) {
217 // If these two calls do not interfere, look past it.
218 if (isNoModRef(AA.getModRefInfo(Call, CallB))) {
219 // If the two calls are the same, return Inst as a Def, so that
220 // Call can be found redundant and eliminated.
221 if (isReadOnlyCall && !isModSet(MR) &&
223 return MemDepResult::getDef(Inst);
224
225 // Otherwise if the two calls don't interact (e.g. CallB is readnone)
226 // keep scanning.
227 continue;
228 } else
229 return MemDepResult::getClobber(Inst);
230 }
231
232 // If we could not obtain a pointer for the instruction and the instruction
233 // touches memory then assume that this is a dependency.
234 if (isModOrRefSet(MR))
235 return MemDepResult::getClobber(Inst);
236 }
237
238 // No dependence found. If this is the entry block of the function, it is
239 // unknown, otherwise it is non-local.
240 if (BB != &BB->getParent()->getEntryBlock())
243}
244
246 const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
247 BasicBlock *BB, Instruction *QueryInst, unsigned *Limit,
248 BatchAAResults &BatchAA) {
249 MemDepResult InvariantGroupDependency = MemDepResult::getUnknown();
250 if (QueryInst != nullptr) {
251 if (auto *LI = dyn_cast<LoadInst>(QueryInst)) {
252 InvariantGroupDependency = getInvariantGroupPointerDependency(LI, BB);
253
254 if (InvariantGroupDependency.isDef())
255 return InvariantGroupDependency;
256 }
257 }
259 MemLoc, isLoad, ScanIt, BB, QueryInst, Limit, BatchAA);
260 if (SimpleDep.isDef())
261 return SimpleDep;
262 // Non-local invariant group dependency indicates there is non local Def
263 // (it only returns nonLocal if it finds nonLocal def), which is better than
264 // local clobber and everything else.
265 if (InvariantGroupDependency.isNonLocal())
266 return InvariantGroupDependency;
267
268 assert(InvariantGroupDependency.isUnknown() &&
269 "InvariantGroupDependency should be only unknown at this point");
270 return SimpleDep;
271}
272
274 const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
275 BasicBlock *BB, Instruction *QueryInst, unsigned *Limit) {
276 BatchAAResults BatchAA(AA, &EEA);
277 return getPointerDependencyFrom(MemLoc, isLoad, ScanIt, BB, QueryInst, Limit,
278 BatchAA);
279}
280
283 BasicBlock *BB) {
284
285 if (!LI->hasMetadata(LLVMContext::MD_invariant_group))
287
288 // Take the ptr operand after all casts and geps 0. This way we can search
289 // cast graph down only.
290 Value *LoadOperand = LI->getPointerOperand()->stripPointerCasts();
291
292 // It's is not safe to walk the use list of global value, because function
293 // passes aren't allowed to look outside their functions.
294 // FIXME: this could be fixed by filtering instructions from outside
295 // of current function.
296 if (isa<GlobalValue>(LoadOperand))
298
299 Instruction *ClosestDependency = nullptr;
300 // Order of instructions in uses list is unpredictible. In order to always
301 // get the same result, we will look for the closest dominance.
302 auto GetClosestDependency = [this](Instruction *Best, Instruction *Other) {
303 assert(Other && "Must call it with not null instruction");
304 if (Best == nullptr || DT.dominates(Best, Other))
305 return Other;
306 return Best;
307 };
308
309 for (const Use &Us : LoadOperand->uses()) {
310 auto *U = dyn_cast<Instruction>(Us.getUser());
311 if (!U || U == LI || !DT.dominates(U, LI))
312 continue;
313
314 // If we hit load/store with the same invariant.group metadata (and the
315 // same pointer operand) we can assume that value pointed by pointer
316 // operand didn't change.
317 if ((isa<LoadInst>(U) ||
318 (isa<StoreInst>(U) &&
319 cast<StoreInst>(U)->getPointerOperand() == LoadOperand)) &&
320 U->hasMetadata(LLVMContext::MD_invariant_group))
321 ClosestDependency = GetClosestDependency(ClosestDependency, U);
322 }
323
324 if (!ClosestDependency)
326 if (ClosestDependency->getParent() == BB)
327 return MemDepResult::getDef(ClosestDependency);
328 // Def(U) can't be returned here because it is non-local. If local
329 // dependency won't be found then return nonLocal counting that the
330 // user will call getNonLocalPointerDependency, which will return cached
331 // result.
332 NonLocalDefsCache.try_emplace(
333 LI, NonLocalDepResult(ClosestDependency->getParent(),
334 MemDepResult::getDef(ClosestDependency), nullptr));
335 ReverseNonLocalDefsCache[ClosestDependency].insert(LI);
337}
338
339// Check if SI that may alias with MemLoc can be safely skipped. This is
340// possible in case if SI can only must alias or no alias with MemLoc (no
341// partial overlapping possible) and it writes the same value that MemLoc
342// contains now (it was loaded before this store and was not modified in
343// between).
345 const MemoryLocation &MemLoc,
346 Align MemLocAlign, BatchAAResults &BatchAA,
347 unsigned ScanLimit) {
348 if (!MemLoc.Size.hasValue())
349 return false;
350 if (MemoryLocation::get(SI).Size != MemLoc.Size)
351 return false;
352 if (MemLoc.Size.isScalable())
353 return false;
354 if (std::min(MemLocAlign, SI->getAlign()).value() <
355 MemLoc.Size.getValue().getKnownMinValue())
356 return false;
357
358 auto *LI = dyn_cast<LoadInst>(SI->getValueOperand());
359 if (!LI || LI->getParent() != SI->getParent())
360 return false;
361 if (BatchAA.alias(MemoryLocation::get(LI), MemLoc) != AliasResult::MustAlias)
362 return false;
363 unsigned NumVisitedInsts = 0;
364 for (const Instruction *I = LI; I != SI; I = I->getNextNode())
365 if (++NumVisitedInsts > ScanLimit ||
366 isModSet(BatchAA.getModRefInfo(I, MemLoc)))
367 return false;
368
369 return true;
370}
371
373 const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
374 BasicBlock *BB, Instruction *QueryInst, unsigned *Limit,
375 BatchAAResults &BatchAA) {
376 bool isInvariantLoad = false;
377 Align MemLocAlign =
379
380 unsigned DefaultLimit = getDefaultBlockScanLimit();
381 if (!Limit)
382 Limit = &DefaultLimit;
383
384 // We must be careful with atomic accesses, as they may allow another thread
385 // to touch this location, clobbering it. We are conservative: if the
386 // QueryInst is not a simple (non-atomic) memory access, we automatically
387 // return getClobber.
388 // If it is simple, we know based on the results of
389 // "Compiler testing via a theory of sound optimisations in the C11/C++11
390 // memory model" in PLDI 2013, that a non-atomic location can only be
391 // clobbered between a pair of a release and an acquire action, with no
392 // access to the location in between.
393 // Here is an example for giving the general intuition behind this rule.
394 // In the following code:
395 // store x 0;
396 // release action; [1]
397 // acquire action; [4]
398 // %val = load x;
399 // It is unsafe to replace %val by 0 because another thread may be running:
400 // acquire action; [2]
401 // store x 42;
402 // release action; [3]
403 // with synchronization from 1 to 2 and from 3 to 4, resulting in %val
404 // being 42. A key property of this program however is that if either
405 // 1 or 4 were missing, there would be a race between the store of 42
406 // either the store of 0 or the load (making the whole program racy).
407 // The paper mentioned above shows that the same property is respected
408 // by every program that can detect any optimization of that kind: either
409 // it is racy (undefined) or there is a release followed by an acquire
410 // between the pair of accesses under consideration.
411
412 // If the load is invariant, we "know" that it doesn't alias *any* write. We
413 // do want to respect mustalias results since defs are useful for value
414 // forwarding, but any mayalias write can be assumed to be noalias.
415 // Arguably, this logic should be pushed inside AliasAnalysis itself.
416 if (isLoad && QueryInst)
417 if (LoadInst *LI = dyn_cast<LoadInst>(QueryInst)) {
418 if (LI->hasMetadata(LLVMContext::MD_invariant_load))
419 isInvariantLoad = true;
420 MemLocAlign = LI->getAlign();
421 }
422
423 // True for volatile instruction.
424 // For Load/Store return true if atomic ordering is stronger than AO,
425 // for other instruction just true if it can read or write to memory.
426 auto isComplexForReordering = [](Instruction * I, AtomicOrdering AO)->bool {
427 if (I->isVolatile())
428 return true;
429 if (auto *LI = dyn_cast<LoadInst>(I))
430 return isStrongerThan(LI->getOrdering(), AO);
431 if (auto *SI = dyn_cast<StoreInst>(I))
432 return isStrongerThan(SI->getOrdering(), AO);
433 return I->mayReadOrWriteMemory();
434 };
435
436 // Walk backwards through the basic block, looking for dependencies.
437 while (ScanIt != BB->begin()) {
438 Instruction *Inst = &*--ScanIt;
439
440 // Limit the amount of scanning we do so we don't end up with quadratic
441 // running time on extreme testcases.
442 --*Limit;
443 if (!*Limit)
445
447 // If we reach a lifetime begin or end marker, then the query ends here
448 // because the value is undefined.
449 Intrinsic::ID ID = II->getIntrinsicID();
450 switch (ID) {
451 case Intrinsic::lifetime_start: {
452 MemoryLocation ArgLoc = MemoryLocation::getAfter(II->getArgOperand(0));
453 if (BatchAA.isMustAlias(ArgLoc, MemLoc))
454 return MemDepResult::getDef(II);
455 continue;
456 }
457 case Intrinsic::masked_load:
458 case Intrinsic::masked_store: {
460 /*ModRefInfo MR =*/ GetLocation(II, Loc, TLI);
461 AliasResult R = BatchAA.alias(Loc, MemLoc);
462 if (R == AliasResult::NoAlias)
463 continue;
464 if (R == AliasResult::MustAlias)
465 return MemDepResult::getDef(II);
466 if (ID == Intrinsic::masked_load)
467 continue;
469 }
470 }
471 }
472
473 // Values depend on loads if the pointers are must aliased. This means
474 // that a load depends on another must aliased load from the same value.
475 // One exception is atomic loads: a value can depend on an atomic load that
476 // it does not alias with when this atomic load indicates that another
477 // thread may be accessing the location.
478 if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
479 // While volatile access cannot be eliminated, they do not have to clobber
480 // non-aliasing locations, as normal accesses, for example, can be safely
481 // reordered with volatile accesses.
482 if (LI->isVolatile()) {
483 if (!QueryInst)
484 // Original QueryInst *may* be volatile
485 return MemDepResult::getClobber(LI);
486 if (QueryInst->isVolatile())
487 // Ordering required if QueryInst is itself volatile
488 return MemDepResult::getClobber(LI);
489 // Otherwise, volatile doesn't imply any special ordering
490 }
491
492 // Atomic loads have complications involved.
493 // A Monotonic (or higher) load is OK if the query inst is itself not
494 // atomic.
495 // FIXME: This is overly conservative.
496 if (LI->isAtomic() && isStrongerThanUnordered(LI->getOrdering())) {
497 if (!QueryInst ||
498 isComplexForReordering(QueryInst, AtomicOrdering::NotAtomic))
499 return MemDepResult::getClobber(LI);
500 if (LI->getOrdering() != AtomicOrdering::Monotonic)
501 return MemDepResult::getClobber(LI);
502 }
503
505
506 // If we found a pointer, check if it could be the same as our pointer.
507 AliasResult R = BatchAA.alias(LoadLoc, MemLoc);
508
509 if (R == AliasResult::NoAlias)
510 continue;
511
512 if (isLoad) {
513 // Must aliased loads are defs of each other.
514 if (R == AliasResult::MustAlias)
515 return MemDepResult::getDef(Inst);
516
517 // If we have a partial alias, then return this as a clobber for the
518 // client to handle.
519 if (R == AliasResult::PartialAlias && R.hasOffset()) {
520 ClobberOffsets[LI] = R.getOffset();
521 return MemDepResult::getClobber(Inst);
522 }
523
524 // Random may-alias loads don't depend on each other without a
525 // dependence.
526 continue;
527 }
528
529 // Stores don't alias loads from read-only memory.
530 if (!isModSet(BatchAA.getModRefInfoMask(LoadLoc)))
531 continue;
532
533 // Stores depend on may/must aliased loads.
534 return MemDepResult::getDef(Inst);
535 }
536
537 if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
538 // Atomic stores have complications involved.
539 // A Monotonic store is OK if the query inst is itself not atomic.
540 // FIXME: This is overly conservative.
541 if (!SI->isUnordered() && SI->isAtomic()) {
542 if (!QueryInst ||
543 isComplexForReordering(QueryInst, AtomicOrdering::Unordered))
545 // Ok, if we are here the guard above guarantee us that
546 // QueryInst is a non-atomic or unordered load/store.
547 // SI is atomic with monotonic or release semantic (seq_cst for store
548 // is actually a release semantic plus total order over other seq_cst
549 // instructions, as soon as QueryInst is not seq_cst we can consider it
550 // as simple release semantic).
551 // Monotonic and Release semantic allows re-ordering before store
552 // so we are safe to go further and check the aliasing. It will prohibit
553 // re-ordering in case locations are may or must alias.
554 }
555
556 // While volatile access cannot be eliminated, they do not have to clobber
557 // non-aliasing locations, as normal accesses can for example be reordered
558 // with volatile accesses.
559 if (SI->isVolatile())
560 if (!QueryInst || QueryInst->isVolatile())
562
563 // If alias analysis can tell that this store is guaranteed to not modify
564 // the query pointer, ignore it. Use getModRefInfo to handle cases where
565 // the query pointer points to constant memory etc.
566 if (!isModOrRefSet(BatchAA.getModRefInfo(SI, MemLoc)))
567 continue;
568
569 // Ok, this store might clobber the query pointer. Check to see if it is
570 // a must alias: in this case, we want to return this as a def.
571 // FIXME: Use ModRefInfo::Must bit from getModRefInfo call above.
573
574 // If we found a pointer, check if it could be the same as our pointer.
575 AliasResult R = BatchAA.alias(StoreLoc, MemLoc);
576
577 if (R == AliasResult::NoAlias)
578 continue;
579 if (R == AliasResult::MustAlias)
580 return MemDepResult::getDef(Inst);
581 if (isInvariantLoad)
582 continue;
583 if (canSkipClobberingStore(SI, MemLoc, MemLocAlign, BatchAA, *Limit))
584 continue;
585 return MemDepResult::getClobber(Inst);
586 }
587
588 // If this is an allocation, and if we know that the accessed pointer is to
589 // the allocation, return Def. This means that there is no dependence and
590 // the access can be optimized based on that. For example, a load could
591 // turn into undef. Note that we can bypass the allocation itself when
592 // looking for a clobber in many cases; that's an alias property and is
593 // handled by BasicAA.
594 if (isa<AllocaInst>(Inst) || isNoAliasCall(Inst)) {
595 const Value *AccessPtr = getUnderlyingObject(MemLoc.Ptr);
596 if (AccessPtr == Inst || BatchAA.isMustAlias(Inst, AccessPtr))
597 return MemDepResult::getDef(Inst);
598 }
599
600 // If we found a select instruction for MemLoc pointer, return it as Def
601 // dependency.
602 if (isa<SelectInst>(Inst) && MemLoc.Ptr == Inst)
603 return MemDepResult::getDef(Inst);
604
605 if (isInvariantLoad)
606 continue;
607
608 // A release fence requires that all stores complete before it, but does
609 // not prevent the reordering of following loads or stores 'before' the
610 // fence. As a result, we look past it when finding a dependency for
611 // loads. DSE uses this to find preceding stores to delete and thus we
612 // can't bypass the fence if the query instruction is a store.
613 if (FenceInst *FI = dyn_cast<FenceInst>(Inst))
614 if (isLoad && FI->getOrdering() == AtomicOrdering::Release)
615 continue;
616
617 // See if this instruction (e.g. a call or vaarg) mod/ref's the pointer.
618 switch (BatchAA.getModRefInfo(Inst, MemLoc)) {
620 // If the call has no effect on the queried pointer, just ignore it.
621 continue;
622 case ModRefInfo::Mod:
623 return MemDepResult::getClobber(Inst);
624 case ModRefInfo::Ref:
625 // If the call is known to never store to the pointer, and if this is a
626 // load query, we can safely ignore it (scan past it).
627 if (isLoad)
628 continue;
629 [[fallthrough]];
630 default:
631 // Otherwise, there is a potential dependence. Return a clobber.
632 return MemDepResult::getClobber(Inst);
633 }
634 }
635
636 // No dependence found. If this is the entry block of the function, it is
637 // unknown, otherwise it is non-local.
638 if (BB != &BB->getParent()->getEntryBlock())
641}
642
644 ClobberOffsets.clear();
645 Instruction *ScanPos = QueryInst;
646
647 // Check for a cached result
648 MemDepResult &LocalCache = LocalDeps[QueryInst];
649
650 // If the cached entry is non-dirty, just return it. Note that this depends
651 // on MemDepResult's default constructing to 'dirty'.
652 if (!LocalCache.isDirty())
653 return LocalCache;
654
655 // Otherwise, if we have a dirty entry, we know we can start the scan at that
656 // instruction, which may save us some work.
657 if (Instruction *Inst = LocalCache.getInst()) {
658 ScanPos = Inst;
659
660 RemoveFromReverseMap(ReverseLocalDeps, Inst, QueryInst);
661 }
662
663 BasicBlock *QueryParent = QueryInst->getParent();
664
665 // Do the scan.
666 if (BasicBlock::iterator(QueryInst) == QueryParent->begin()) {
667 // No dependence found. If this is the entry block of the function, it is
668 // unknown, otherwise it is non-local.
669 if (QueryParent != &QueryParent->getParent()->getEntryBlock())
670 LocalCache = MemDepResult::getNonLocal();
671 else
672 LocalCache = MemDepResult::getNonFuncLocal();
673 } else {
674 MemoryLocation MemLoc;
675 ModRefInfo MR = GetLocation(QueryInst, MemLoc, TLI);
676 if (MemLoc.Ptr) {
677 // If we can do a pointer scan, make it happen.
678 bool isLoad = !isModSet(MR);
679 if (auto *II = dyn_cast<IntrinsicInst>(QueryInst))
680 isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_start;
681
682 LocalCache =
683 getPointerDependencyFrom(MemLoc, isLoad, ScanPos->getIterator(),
684 QueryParent, QueryInst, nullptr);
685 } else if (auto *QueryCall = dyn_cast<CallBase>(QueryInst)) {
686 bool isReadOnly = AA.onlyReadsMemory(QueryCall);
687 LocalCache = getCallDependencyFrom(QueryCall, isReadOnly,
688 ScanPos->getIterator(), QueryParent);
689 } else
690 // Non-memory instruction.
691 LocalCache = MemDepResult::getUnknown();
692 }
693
694 // Remember the result!
695 if (Instruction *I = LocalCache.getInst())
696 ReverseLocalDeps[I].insert(QueryInst);
697
698 return LocalCache;
699}
700
701#ifndef NDEBUG
702/// This method is used when -debug is specified to verify that cache arrays
703/// are properly kept sorted.
705 int Count = -1) {
706 if (Count == -1)
707 Count = Cache.size();
708 assert(std::is_sorted(Cache.begin(), Cache.begin() + Count) &&
709 "Cache isn't sorted!");
710}
711#endif
712
715 assert(getDependency(QueryCall).isNonLocal() &&
716 "getNonLocalCallDependency should only be used on calls with "
717 "non-local deps!");
718 PerInstNLInfo &CacheP = NonLocalDepsMap[QueryCall];
719 NonLocalDepInfo &Cache = CacheP.first;
720
721 // This is the set of blocks that need to be recomputed. In the cached case,
722 // this can happen due to instructions being deleted etc. In the uncached
723 // case, this starts out as the set of predecessors we care about.
725
726 if (!Cache.empty()) {
727 // Okay, we have a cache entry. If we know it is not dirty, just return it
728 // with no computation.
729 if (!CacheP.second) {
730 ++NumCacheNonLocal;
731 return Cache;
732 }
733
734 // If we already have a partially computed set of results, scan them to
735 // determine what is dirty, seeding our initial DirtyBlocks worklist.
736 for (auto &Entry : Cache)
737 if (Entry.getResult().isDirty())
738 DirtyBlocks.push_back(Entry.getBB());
739
740 // Sort the cache so that we can do fast binary search lookups below.
741 llvm::sort(Cache);
742
743 ++NumCacheDirtyNonLocal;
744 } else {
745 // Seed DirtyBlocks with each of the preds of QueryInst's block.
746 BasicBlock *QueryBB = QueryCall->getParent();
747 append_range(DirtyBlocks, PredCache.get(QueryBB));
748 ++NumUncacheNonLocal;
749 }
750
751 // isReadonlyCall - If this is a read-only call, we can be more aggressive.
752 bool isReadonlyCall = AA.onlyReadsMemory(QueryCall);
753
755
756 unsigned NumSortedEntries = Cache.size();
757 LLVM_DEBUG(AssertSorted(Cache));
758
759 // Iterate while we still have blocks to update.
760 while (!DirtyBlocks.empty()) {
761 BasicBlock *DirtyBB = DirtyBlocks.pop_back_val();
762
763 // Already processed this block?
764 if (!Visited.insert(DirtyBB).second)
765 continue;
766
767 // Do a binary search to see if we already have an entry for this block in
768 // the cache set. If so, find it.
769 LLVM_DEBUG(AssertSorted(Cache, NumSortedEntries));
770 NonLocalDepInfo::iterator Entry =
771 std::upper_bound(Cache.begin(), Cache.begin() + NumSortedEntries,
772 NonLocalDepEntry(DirtyBB));
773 if (Entry != Cache.begin() && std::prev(Entry)->getBB() == DirtyBB)
774 --Entry;
775
776 NonLocalDepEntry *ExistingResult = nullptr;
777 if (Entry != Cache.begin() + NumSortedEntries &&
778 Entry->getBB() == DirtyBB) {
779 // If we already have an entry, and if it isn't already dirty, the block
780 // is done.
781 if (!Entry->getResult().isDirty())
782 continue;
783
784 // Otherwise, remember this slot so we can update the value.
785 ExistingResult = &*Entry;
786 }
787
788 // If the dirty entry has a pointer, start scanning from it so we don't have
789 // to rescan the entire block.
790 BasicBlock::iterator ScanPos = DirtyBB->end();
791 if (ExistingResult) {
792 if (Instruction *Inst = ExistingResult->getResult().getInst()) {
793 ScanPos = Inst->getIterator();
794 // We're removing QueryInst's use of Inst.
795 RemoveFromReverseMap<Instruction *>(ReverseNonLocalDeps, Inst,
796 QueryCall);
797 }
798 }
799
800 // Find out if this block has a local dependency for QueryInst.
801 MemDepResult Dep;
802
803 if (ScanPos != DirtyBB->begin()) {
804 Dep = getCallDependencyFrom(QueryCall, isReadonlyCall, ScanPos, DirtyBB);
805 } else if (DirtyBB != &DirtyBB->getParent()->getEntryBlock()) {
806 // No dependence found. If this is the entry block of the function, it is
807 // a clobber, otherwise it is unknown.
809 } else {
811 }
812
813 // If we had a dirty entry for the block, update it. Otherwise, just add
814 // a new entry.
815 if (ExistingResult)
816 ExistingResult->setResult(Dep);
817 else
818 Cache.push_back(NonLocalDepEntry(DirtyBB, Dep));
819
820 // If the block has a dependency (i.e. it isn't completely transparent to
821 // the value), remember the association!
822 if (!Dep.isNonLocal()) {
823 // Keep the ReverseNonLocalDeps map up to date so we can efficiently
824 // update this when we remove instructions.
825 if (Instruction *Inst = Dep.getInst())
826 ReverseNonLocalDeps[Inst].insert(QueryCall);
827 } else {
828
829 // If the block *is* completely transparent to the load, we need to check
830 // the predecessors of this block. Add them to our worklist.
831 append_range(DirtyBlocks, PredCache.get(DirtyBB));
832 }
833 }
834
835 return Cache;
836}
837
840 const MemoryLocation Loc = MemoryLocation::get(QueryInst);
841 bool isLoad = isa<LoadInst>(QueryInst);
842 BasicBlock *FromBB = QueryInst->getParent();
843 assert(FromBB);
844
845 assert(Loc.Ptr->getType()->isPointerTy() &&
846 "Can't get pointer deps of a non-pointer!");
847 Result.clear();
848 {
849 // Check if there is cached Def with invariant.group.
850 auto NonLocalDefIt = NonLocalDefsCache.find(QueryInst);
851 if (NonLocalDefIt != NonLocalDefsCache.end()) {
852 Result.push_back(NonLocalDefIt->second);
853 ReverseNonLocalDefsCache[NonLocalDefIt->second.getResult().getInst()]
854 .erase(QueryInst);
855 NonLocalDefsCache.erase(NonLocalDefIt);
856 return;
857 }
858 }
859 // This routine does not expect to deal with volatile instructions.
860 // Doing so would require piping through the QueryInst all the way through.
861 // TODO: volatiles can't be elided, but they can be reordered with other
862 // non-volatile accesses.
863
864 // We currently give up on any instruction which is ordered, but we do handle
865 // atomic instructions which are unordered.
866 // TODO: Handle ordered instructions
867 auto isOrdered = [](Instruction *Inst) {
868 if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
869 return !LI->isUnordered();
870 } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
871 return !SI->isUnordered();
872 }
873 return false;
874 };
875 if (QueryInst->isVolatile() || isOrdered(QueryInst)) {
876 Result.push_back(NonLocalDepResult(FromBB, MemDepResult::getUnknown(),
877 const_cast<Value *>(Loc.Ptr)));
878 return;
879 }
880 const DataLayout &DL = FromBB->getDataLayout();
881 PHITransAddr Address(const_cast<Value *>(Loc.Ptr), DL, &AC);
882
883 // NonLocalPointerDepVisited is the set of blocks we've inspected, and the
884 // pointer we consider in each block. Because of critical edges, we currently
885 // bail out if querying a block with multiple different pointers. This can
886 // happen during PHI translation.
887 ++NonLocalPointerDepEpoch;
888 assert(NonLocalPointerDepEpoch > 0 &&
889 "NonLocalPointerDepVisitedEpoch overflow");
890 NonLocalPointerDepVisited.resize(FromBB->getParent()->getMaxBlockNumber());
891 if (getNonLocalPointerDepFromBB(QueryInst, Address, Loc, isLoad, FromBB,
892 Result, true))
893 return;
894 Result.clear();
895 Result.push_back(NonLocalDepResult(FromBB, MemDepResult::getUnknown(),
896 const_cast<Value *>(Loc.Ptr)));
897}
898
899/// Compute the memdep value for BB with Pointer/PointeeSize using either
900/// cached information in Cache or by doing a lookup (which may use dirty cache
901/// info if available).
902///
903/// If we do a lookup, add the result to the cache.
904MemDepResult MemoryDependenceResults::getNonLocalInfoForBlock(
905 Instruction *QueryInst, const MemoryLocation &Loc, bool isLoad,
906 BasicBlock *BB, NonLocalDepInfo *Cache, unsigned NumSortedEntries,
907 BatchAAResults &BatchAA) {
908
909 bool isInvariantLoad = false;
910
911 if (LoadInst *LI = dyn_cast_or_null<LoadInst>(QueryInst))
912 isInvariantLoad = LI->getMetadata(LLVMContext::MD_invariant_load);
913
914 // Do a binary search to see if we already have an entry for this block in
915 // the cache set. If so, find it.
916 NonLocalDepInfo::iterator Entry = std::upper_bound(
917 Cache->begin(), Cache->begin() + NumSortedEntries, NonLocalDepEntry(BB));
918 if (Entry != Cache->begin() && (Entry - 1)->getBB() == BB)
919 --Entry;
920
921 NonLocalDepEntry *ExistingResult = nullptr;
922 if (Entry != Cache->begin() + NumSortedEntries && Entry->getBB() == BB)
923 ExistingResult = &*Entry;
924
925 // Use cached result for invariant load only if there is no dependency for non
926 // invariant load. In this case invariant load can not have any dependency as
927 // well.
928 if (ExistingResult && isInvariantLoad &&
929 !ExistingResult->getResult().isNonFuncLocal())
930 ExistingResult = nullptr;
931
932 // If we have a cached entry, and it is non-dirty, use it as the value for
933 // this dependency.
934 if (ExistingResult && !ExistingResult->getResult().isDirty()) {
935 ++NumCacheNonLocalPtr;
936 return ExistingResult->getResult();
937 }
938
939 // Otherwise, we have to scan for the value. If we have a dirty cache
940 // entry, start scanning from its position, otherwise we scan from the end
941 // of the block.
942 BasicBlock::iterator ScanPos = BB->end();
943 if (ExistingResult && ExistingResult->getResult().getInst()) {
944 assert(ExistingResult->getResult().getInst()->getParent() == BB &&
945 "Instruction invalidated?");
946 ++NumCacheDirtyNonLocalPtr;
947 ScanPos = ExistingResult->getResult().getInst()->getIterator();
948
949 // Eliminating the dirty entry from 'Cache', so update the reverse info.
950 ValueIsLoadPair CacheKey(Loc.Ptr, isLoad);
951 RemoveFromReverseMap(ReverseNonLocalPtrDeps, &*ScanPos, CacheKey);
952 } else {
953 ++NumUncacheNonLocalPtr;
954 }
955
956 // Scan the block for the dependency.
957 MemDepResult Dep = getPointerDependencyFrom(Loc, isLoad, ScanPos, BB,
958 QueryInst, nullptr, BatchAA);
959
960 // Don't cache results for invariant load.
961 if (isInvariantLoad)
962 return Dep;
963
964 // If we had a dirty entry for the block, update it. Otherwise, just add
965 // a new entry.
966 if (ExistingResult)
967 ExistingResult->setResult(Dep);
968 else
969 Cache->push_back(NonLocalDepEntry(BB, Dep));
970
971 // If the block has a dependency (i.e. it isn't completely transparent to
972 // the value), remember the reverse association because we just added it
973 // to Cache!
974 if (!Dep.isLocal())
975 return Dep;
976
977 // Keep the ReverseNonLocalPtrDeps map up to date so we can efficiently
978 // update MemDep when we remove instructions.
979 Instruction *Inst = Dep.getInst();
980 assert(Inst && "Didn't depend on anything?");
981 ValueIsLoadPair CacheKey(Loc.Ptr, isLoad);
982 ReverseNonLocalPtrDeps[Inst].insert(CacheKey);
983 return Dep;
984}
985
986/// Sort the NonLocalDepInfo cache, given a certain number of elements in the
987/// array that are already properly ordered.
988///
989/// This is optimized for the case when only a few entries are added.
990static void
992 unsigned NumSortedEntries) {
993
994 // If only one entry, don't sort.
995 if (Cache.size() < 2)
996 return;
997
998 unsigned s = Cache.size() - NumSortedEntries;
999
1000 // If the cache is already sorted, don't sort it again.
1001 if (s == 0)
1002 return;
1003
1004 // If no entry is sorted, sort the whole cache.
1005 if (NumSortedEntries == 0) {
1006 llvm::sort(Cache);
1007 return;
1008 }
1009
1010 // If the number of unsorted entires is small and the cache size is big, using
1011 // insertion sort is faster. Here use Log2_32 to quickly choose the sort
1012 // method.
1013 if (s < Log2_32(Cache.size())) {
1014 while (s > 0) {
1015 NonLocalDepEntry Val = Cache.back();
1016 Cache.pop_back();
1017 MemoryDependenceResults::NonLocalDepInfo::iterator Entry =
1018 std::upper_bound(Cache.begin(), Cache.end() - s + 1, Val);
1019 Cache.insert(Entry, Val);
1020 s--;
1021 }
1022 } else {
1023 llvm::sort(Cache);
1024 }
1025}
1026
1027void MemoryDependenceResults::setNonLocalPointerDepVisited(BasicBlock *BB,
1028 Value *V) {
1029 NonLocalPointerDepVisited[BB->getNumber()] = {V, NonLocalPointerDepEpoch};
1030}
1031
1032bool MemoryDependenceResults::isNonLocalPointerDepVisited(
1033 BasicBlock *BB) const {
1034 return NonLocalPointerDepVisited[BB->getNumber()].second ==
1035 NonLocalPointerDepEpoch;
1036}
1037
1038Value *
1039MemoryDependenceResults::lookupNonLocalPointerDepVisited(BasicBlock *BB) const {
1040 assert(isNonLocalPointerDepVisited(BB) &&
1041 "Visited value requested for unseen block");
1042 return NonLocalPointerDepVisited[BB->getNumber()].first;
1043}
1044
1045/// Perform a dependency query based on pointer/pointeesize starting at the end
1046/// of StartBB.
1047///
1048/// Add any clobber/def results to the results vector and keep track of which
1049/// blocks are visited in 'NonLocalPointerDepVisited'.
1050///
1051/// This has special behavior for the first block queries (when SkipFirstBlock
1052/// is true). In this special case, it ignores the contents of the specified
1053/// block and starts returning dependence info for its predecessors.
1054///
1055/// This function returns true on success, or false to indicate that it could
1056/// not compute dependence information for some reason. This should be treated
1057/// as a clobber dependence on the first instruction in the predecessor block.
1058bool MemoryDependenceResults::getNonLocalPointerDepFromBB(
1059 Instruction *QueryInst, const PHITransAddr &Pointer,
1060 const MemoryLocation &Loc, bool isLoad, BasicBlock *StartBB,
1061 SmallVectorImpl<NonLocalDepResult> &Result, bool SkipFirstBlock,
1062 bool IsIncomplete) {
1063 // Look up the cached info for Pointer.
1064 ValueIsLoadPair CacheKey(Pointer.getAddr(), isLoad);
1065
1066 // Set up a temporary NLPI value. If the map doesn't yet have an entry for
1067 // CacheKey, this value will be inserted as the associated value. Otherwise,
1068 // it'll be ignored, and we'll have to check to see if the cached size and
1069 // aa tags are consistent with the current query.
1070 NonLocalPointerInfo InitialNLPI;
1071 InitialNLPI.Size = Loc.Size;
1072 InitialNLPI.AATags = Loc.AATags;
1073
1074 bool isInvariantLoad = false;
1075 if (LoadInst *LI = dyn_cast_or_null<LoadInst>(QueryInst))
1076 isInvariantLoad = LI->getMetadata(LLVMContext::MD_invariant_load);
1077
1078 // Get the NLPI for CacheKey, inserting one into the map if it doesn't
1079 // already have one.
1080 std::pair<CachedNonLocalPointerInfo::iterator, bool> Pair =
1081 NonLocalPointerDeps.insert(std::make_pair(CacheKey, InitialNLPI));
1082 NonLocalPointerInfo *CacheInfo = &Pair.first->second;
1083
1084 // If we already have a cache entry for this CacheKey, we may need to do some
1085 // work to reconcile the cache entry and the current query.
1086 // Invariant loads don't participate in caching. Thus no need to reconcile.
1087 if (!isInvariantLoad && !Pair.second) {
1088 if (CacheInfo->Size != Loc.Size) {
1089 // The query's Size is not equal to the cached one. Throw out the cached
1090 // data and proceed with the query with the new size.
1091 CacheInfo->Pair = BBSkipFirstBlockPair();
1092 CacheInfo->Size = Loc.Size;
1093 for (auto &Entry : CacheInfo->NonLocalDeps)
1094 if (Instruction *Inst = Entry.getResult().getInst())
1095 RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
1096 CacheInfo->NonLocalDeps.clear();
1097 // The cache is cleared (in the above line) so we will have lost
1098 // information about blocks we have already visited. We therefore must
1099 // assume that the cache information is incomplete.
1100 IsIncomplete = true;
1101 }
1102
1103 // If the query's AATags are inconsistent with the cached one,
1104 // conservatively throw out the cached data and restart the query with
1105 // no tag if needed.
1106 if (CacheInfo->AATags != Loc.AATags) {
1107 if (CacheInfo->AATags) {
1108 CacheInfo->Pair = BBSkipFirstBlockPair();
1109 CacheInfo->AATags = AAMDNodes();
1110 for (auto &Entry : CacheInfo->NonLocalDeps)
1111 if (Instruction *Inst = Entry.getResult().getInst())
1112 RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
1113 CacheInfo->NonLocalDeps.clear();
1114 // The cache is cleared (in the above line) so we will have lost
1115 // information about blocks we have already visited. We therefore must
1116 // assume that the cache information is incomplete.
1117 IsIncomplete = true;
1118 }
1119 if (Loc.AATags)
1120 return getNonLocalPointerDepFromBB(
1121 QueryInst, Pointer, Loc.getWithoutAATags(), isLoad, StartBB, Result,
1122 SkipFirstBlock, IsIncomplete);
1123 }
1124 }
1125
1126 NonLocalDepInfo *Cache = &CacheInfo->NonLocalDeps;
1127
1128 // If we have valid cached information for exactly the block we are
1129 // investigating, just return it with no recomputation.
1130 // Don't use cached information for invariant loads since it is valid for
1131 // non-invariant loads only.
1132 if (!IsIncomplete && !isInvariantLoad &&
1133 CacheInfo->Pair == BBSkipFirstBlockPair(StartBB, SkipFirstBlock)) {
1134 // We have a fully cached result for this query then we can just return the
1135 // cached results and populate the visited set. However, we have to verify
1136 // that we don't already have conflicting results for these blocks. Check
1137 // to ensure that if a block in the results set is in the visited set that
1138 // it was for the same pointer query.
1139 for (auto &Entry : *Cache) {
1140 if (!isNonLocalPointerDepVisited(Entry.getBB()))
1141 continue;
1142 Value *Prev = lookupNonLocalPointerDepVisited(Entry.getBB());
1143 if (Prev == Pointer.getAddr())
1144 continue;
1145
1146 // We have a pointer mismatch in a block. Just return false, saying
1147 // that something was clobbered in this result. We could also do a
1148 // non-fully cached query, but there is little point in doing this.
1149 return false;
1150 }
1151
1152 Value *Addr = Pointer.getAddr();
1153 for (auto &Entry : *Cache) {
1154 setNonLocalPointerDepVisited(Entry.getBB(), Addr);
1155 if (Entry.getResult().isNonLocal()) {
1156 continue;
1157 }
1158
1159 if (DT.isReachableFromEntry(Entry.getBB())) {
1160 Result.push_back(
1161 NonLocalDepResult(Entry.getBB(), Entry.getResult(), Addr));
1162 }
1163 }
1164 ++NumCacheCompleteNonLocalPtr;
1165 return true;
1166 }
1167
1168 // If the size of this cache has surpassed the global limit, stop here.
1169 if (Cache->size() > CacheGlobalLimit)
1170 return false;
1171
1172 // Otherwise, either this is a new block, a block with an invalid cache
1173 // pointer or one that we're about to invalidate by putting more info into
1174 // it than its valid cache info. If empty and not explicitly indicated as
1175 // incomplete, the result will be valid cache info, otherwise it isn't.
1176 //
1177 // Invariant loads don't affect cache in any way thus no need to update
1178 // CacheInfo as well.
1179 if (!isInvariantLoad) {
1180 if (!IsIncomplete && Cache->empty())
1181 CacheInfo->Pair = BBSkipFirstBlockPair(StartBB, SkipFirstBlock);
1182 else
1183 CacheInfo->Pair = BBSkipFirstBlockPair();
1184 }
1185
1187 Worklist.push_back(StartBB);
1188
1189 // PredList used inside loop.
1191
1192 // Keep track of the entries that we know are sorted. Previously cached
1193 // entries will all be sorted. The entries we add we only sort on demand (we
1194 // don't insert every element into its sorted position). We know that we
1195 // won't get any reuse from currently inserted values, because we don't
1196 // revisit blocks after we insert info for them.
1197 unsigned NumSortedEntries = Cache->size();
1198 unsigned WorklistEntries = BlockNumberLimit;
1199 bool GotWorklistLimit = false;
1200 LLVM_DEBUG(AssertSorted(*Cache));
1201
1202 BatchAAResults BatchAA(AA, &EEA);
1203 while (!Worklist.empty()) {
1204 BasicBlock *BB = Worklist.pop_back_val();
1205
1206 // If we do process a large number of blocks it becomes very expensive and
1207 // likely it isn't worth worrying about
1208 if (Result.size() > NumResultsLimit) {
1209 // Sort it now (if needed) so that recursive invocations of
1210 // getNonLocalPointerDepFromBB and other routines that could reuse the
1211 // cache value will only see properly sorted cache arrays.
1212 if (Cache && NumSortedEntries != Cache->size()) {
1213 SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
1214 }
1215 // Since we bail out, the "Cache" set won't contain all of the
1216 // results for the query. This is ok (we can still use it to accelerate
1217 // specific block queries) but we can't do the fastpath "return all
1218 // results from the set". Clear out the indicator for this.
1219 CacheInfo->Pair = BBSkipFirstBlockPair();
1220 return false;
1221 }
1222
1223 // Skip the first block if we have it.
1224 if (!SkipFirstBlock) {
1225 // Analyze the dependency of *Pointer in FromBB. See if we already have
1226 // been here.
1227 assert(isNonLocalPointerDepVisited(BB) &&
1228 "Should check 'visited' before adding to WL");
1229
1230 // Get the dependency info for Pointer in BB. If we have cached
1231 // information, we will use it, otherwise we compute it.
1232 LLVM_DEBUG(AssertSorted(*Cache, NumSortedEntries));
1233 MemDepResult Dep = getNonLocalInfoForBlock(
1234 QueryInst, Loc, isLoad, BB, Cache, NumSortedEntries, BatchAA);
1235
1236 // If we got a Def or Clobber, add this to the list of results.
1237 if (!Dep.isNonLocal()) {
1238 if (DT.isReachableFromEntry(BB)) {
1239 Result.push_back(NonLocalDepResult(BB, Dep, Pointer.getAddr()));
1240 continue;
1241 }
1242 }
1243 }
1244
1245 // If 'Pointer' is an instruction defined in this block, then we need to do
1246 // phi translation to change it into a value live in the predecessor block.
1247 // If not, we just add the predecessors to the worklist and scan them with
1248 // the same Pointer.
1249 if (!Pointer.needsPHITranslationFromBlock(BB)) {
1250 SkipFirstBlock = false;
1251 SmallVector<BasicBlock *, 16> NewBlocks;
1252 for (BasicBlock *Pred : PredCache.get(BB)) {
1253 // Verify that we haven't looked at this block yet.
1254 if (!isNonLocalPointerDepVisited(Pred)) {
1255 setNonLocalPointerDepVisited(Pred, Pointer.getAddr());
1256 // First time we've looked at *PI.
1257 NewBlocks.push_back(Pred);
1258 continue;
1259 }
1260 Value *Prev = lookupNonLocalPointerDepVisited(Pred);
1261 // If we have seen this block before, but it was with a different
1262 // pointer then we have a phi translation failure and we have to treat
1263 // this as a clobber.
1264 if (Prev != Pointer.getAddr()) {
1265 // Make sure to clean up the Visited map before continuing on to
1266 // PredTranslationFailure.
1267 for (auto *NewBlock : NewBlocks)
1268 setNonLocalPointerDepVisited(NewBlock, nullptr);
1269 goto PredTranslationFailure;
1270 }
1271 }
1272 if (NewBlocks.size() > WorklistEntries) {
1273 // Make sure to clean up the Visited map before continuing on to
1274 // PredTranslationFailure.
1275 for (auto *NewBlock : NewBlocks)
1276 setNonLocalPointerDepVisited(NewBlock, nullptr);
1277 GotWorklistLimit = true;
1278 goto PredTranslationFailure;
1279 }
1280 WorklistEntries -= NewBlocks.size();
1281 Worklist.append(NewBlocks.begin(), NewBlocks.end());
1282 continue;
1283 }
1284
1285 // We do need to do phi translation, if we know ahead of time we can't phi
1286 // translate this value, don't even try.
1287 if (!Pointer.isPotentiallyPHITranslatable())
1288 goto PredTranslationFailure;
1289
1290 // We may have added values to the cache list before this PHI translation.
1291 // If so, we haven't done anything to ensure that the cache remains sorted.
1292 // Sort it now (if needed) so that recursive invocations of
1293 // getNonLocalPointerDepFromBB and other routines that could reuse the cache
1294 // value will only see properly sorted cache arrays.
1295 if (Cache && NumSortedEntries != Cache->size()) {
1296 SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
1297 NumSortedEntries = Cache->size();
1298 }
1299 Cache = nullptr;
1300
1301 PredList.clear();
1302 for (BasicBlock *Pred : PredCache.get(BB)) {
1303 PredList.push_back(std::make_pair(Pred, Pointer));
1304
1305 // Get the PHI translated pointer in this predecessor. This can fail if
1306 // not translatable, in which case the getAddr() returns null.
1307 PHITransAddr &PredPointer = PredList.back().second;
1308 Value *PredPtrVal =
1309 PredPointer.translateValue(BB, Pred, &DT, /*MustDominate=*/false);
1310
1311 // Check to see if we have already visited this pred block with another
1312 // pointer. If so, we can't do this lookup. This failure can occur
1313 // with PHI translation when a critical edge exists and the PHI node in
1314 // the successor translates to a pointer value different than the
1315 // pointer the block was first analyzed with.
1316 if (!isNonLocalPointerDepVisited(Pred)) {
1317 setNonLocalPointerDepVisited(Pred, PredPtrVal);
1318 continue;
1319 }
1320 Value *PrevVal = lookupNonLocalPointerDepVisited(Pred);
1321
1322 // We found the pred; take it off the list of preds to visit.
1323 PredList.pop_back();
1324
1325 // If the predecessor was visited with PredPtr, then we already did
1326 // the analysis and can ignore it.
1327 if (PrevVal == PredPtrVal)
1328 continue;
1329
1330 // Otherwise, the block was previously analyzed with a different
1331 // pointer. We can't represent the result of this case, so we just
1332 // treat this as a phi translation failure.
1333
1334 // Make sure to clean up the Visited map before continuing on to
1335 // PredTranslationFailure.
1336 for (const auto &Pred : PredList)
1337 setNonLocalPointerDepVisited(Pred.first, nullptr);
1338
1339 goto PredTranslationFailure;
1340 }
1341
1342 // Actually process results here; this need to be a separate loop to avoid
1343 // calling getNonLocalPointerDepFromBB for blocks we don't want to return
1344 // any results for. (getNonLocalPointerDepFromBB will modify our
1345 // datastructures in ways the code after the PredTranslationFailure label
1346 // doesn't expect.)
1347 for (auto &I : PredList) {
1348 BasicBlock *Pred = I.first;
1349 PHITransAddr &PredPointer = I.second;
1350 Value *PredPtrVal = PredPointer.getAddr();
1351
1352 bool CanTranslate = true;
1353 // If PHI translation was unable to find an available pointer in this
1354 // predecessor, then we have to assume that the pointer is clobbered in
1355 // that predecessor. We can still do PRE of the load, which would insert
1356 // a computation of the pointer in this predecessor.
1357 if (!PredPtrVal)
1358 CanTranslate = false;
1359
1360 // FIXME: it is entirely possible that PHI translating will end up with
1361 // the same value. Consider PHI translating something like:
1362 // X = phi [x, bb1], [y, bb2]. PHI translating for bb1 doesn't *need*
1363 // to recurse here, pedantically speaking.
1364
1365 // If getNonLocalPointerDepFromBB fails here, that means the cached
1366 // result conflicted with the Visited list; we have to conservatively
1367 // assume it is unknown, but this also does not block PRE of the load.
1368 if (!CanTranslate ||
1369 !getNonLocalPointerDepFromBB(QueryInst, PredPointer,
1370 Loc.getWithNewPtr(PredPtrVal), isLoad,
1371 Pred, Result)) {
1372 // Add the entry to the Result list.
1373 NonLocalDepResult Entry(Pred, MemDepResult::getUnknown(), PredPtrVal);
1374 Result.push_back(Entry);
1375
1376 // Since we had a phi translation failure, the cache for CacheKey won't
1377 // include all of the entries that we need to immediately satisfy future
1378 // queries. Mark this in NonLocalPointerDeps by setting the
1379 // BBSkipFirstBlockPair pointer to null. This requires reuse of the
1380 // cached value to do more work but not miss the phi trans failure.
1381 NonLocalPointerInfo &NLPI = NonLocalPointerDeps[CacheKey];
1382 NLPI.Pair = BBSkipFirstBlockPair();
1383 continue;
1384 }
1385 }
1386
1387 // Refresh the CacheInfo/Cache pointer so that it isn't invalidated.
1388 CacheInfo = &NonLocalPointerDeps[CacheKey];
1389 Cache = &CacheInfo->NonLocalDeps;
1390 NumSortedEntries = Cache->size();
1391
1392 // Since we did phi translation, the "Cache" set won't contain all of the
1393 // results for the query. This is ok (we can still use it to accelerate
1394 // specific block queries) but we can't do the fastpath "return all
1395 // results from the set" Clear out the indicator for this.
1396 CacheInfo->Pair = BBSkipFirstBlockPair();
1397 SkipFirstBlock = false;
1398 continue;
1399
1400 PredTranslationFailure:
1401 // The following code is "failure"; we can't produce a sane translation
1402 // for the given block. It assumes that we haven't modified any of
1403 // our datastructures while processing the current block.
1404
1405 if (!Cache) {
1406 // Refresh the CacheInfo/Cache pointer if it got invalidated.
1407 CacheInfo = &NonLocalPointerDeps[CacheKey];
1408 Cache = &CacheInfo->NonLocalDeps;
1409 NumSortedEntries = Cache->size();
1410 }
1411
1412 // Since we failed phi translation, the "Cache" set won't contain all of the
1413 // results for the query. This is ok (we can still use it to accelerate
1414 // specific block queries) but we can't do the fastpath "return all
1415 // results from the set". Clear out the indicator for this.
1416 CacheInfo->Pair = BBSkipFirstBlockPair();
1417
1418 // If *nothing* works, mark the pointer as unknown.
1419 //
1420 // If this is the magic first block, return this as a clobber of the whole
1421 // incoming value. Since we can't phi translate to one of the predecessors,
1422 // we have to bail out.
1423 if (SkipFirstBlock)
1424 return false;
1425
1426 // Results of invariant loads are not cached thus no need to update cached
1427 // information.
1428 if (!isInvariantLoad) {
1429 for (NonLocalDepEntry &I : llvm::reverse(*Cache)) {
1430 if (I.getBB() != BB)
1431 continue;
1432
1433 assert((GotWorklistLimit || I.getResult().isNonLocal() ||
1434 !DT.isReachableFromEntry(BB)) &&
1435 "Should only be here with transparent block");
1436
1437 I.setResult(MemDepResult::getUnknown());
1438
1439
1440 break;
1441 }
1442 }
1443 (void)GotWorklistLimit;
1444 // Go ahead and report unknown dependence.
1445 Result.push_back(
1446 NonLocalDepResult(BB, MemDepResult::getUnknown(), Pointer.getAddr()));
1447 }
1448
1449 // Okay, we're done now. If we added new values to the cache, re-sort it.
1450 SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
1451 LLVM_DEBUG(AssertSorted(*Cache));
1452 return true;
1453}
1454
1455/// If P exists in CachedNonLocalPointerInfo or NonLocalDefsCache, remove it.
1456void MemoryDependenceResults::removeCachedNonLocalPointerDependencies(
1457 ValueIsLoadPair P) {
1458
1459 // Most of the time this cache is empty.
1460 if (!NonLocalDefsCache.empty()) {
1461 auto it = NonLocalDefsCache.find(P.getPointer());
1462 if (it != NonLocalDefsCache.end()) {
1463 RemoveFromReverseMap(ReverseNonLocalDefsCache,
1464 it->second.getResult().getInst(), P.getPointer());
1465 NonLocalDefsCache.erase(it);
1466 }
1467
1468 if (auto *I = dyn_cast<Instruction>(P.getPointer())) {
1469 auto toRemoveIt = ReverseNonLocalDefsCache.find(I);
1470 if (toRemoveIt != ReverseNonLocalDefsCache.end()) {
1471 for (const auto *entry : toRemoveIt->second)
1472 NonLocalDefsCache.erase(entry);
1473 ReverseNonLocalDefsCache.erase(toRemoveIt);
1474 }
1475 }
1476 }
1477
1478 CachedNonLocalPointerInfo::iterator It = NonLocalPointerDeps.find(P);
1479 if (It == NonLocalPointerDeps.end())
1480 return;
1481
1482 // Remove all of the entries in the BB->val map. This involves removing
1483 // instructions from the reverse map.
1484 NonLocalDepInfo &PInfo = It->second.NonLocalDeps;
1485
1486 for (const NonLocalDepEntry &DE : PInfo) {
1487 Instruction *Target = DE.getResult().getInst();
1488 if (!Target)
1489 continue; // Ignore non-local dep results.
1490 assert(Target->getParent() == DE.getBB());
1491
1492 // Eliminating the dirty entry from 'Cache', so update the reverse info.
1493 RemoveFromReverseMap(ReverseNonLocalPtrDeps, Target, P);
1494 }
1495
1496 // Remove P from NonLocalPointerDeps (which deletes NonLocalDepInfo).
1497 NonLocalPointerDeps.erase(It);
1498}
1499
1501 // If Ptr isn't really a pointer, just ignore it.
1502 if (!Ptr->getType()->isPointerTy())
1503 return;
1504 // Flush store info for the pointer.
1505 removeCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, false));
1506 // Flush load info for the pointer.
1507 removeCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, true));
1508}
1509
1511 PredCache.clear();
1512}
1513
1515 EEA.removeInstruction(RemInst);
1516
1517 // Walk through the Non-local dependencies, removing this one as the value
1518 // for any cached queries.
1519 NonLocalDepMapType::iterator NLDI = NonLocalDepsMap.find(RemInst);
1520 if (NLDI != NonLocalDepsMap.end()) {
1521 NonLocalDepInfo &BlockMap = NLDI->second.first;
1522 for (auto &Entry : BlockMap)
1523 if (Instruction *Inst = Entry.getResult().getInst())
1524 RemoveFromReverseMap(ReverseNonLocalDeps, Inst, RemInst);
1525 NonLocalDepsMap.erase(NLDI);
1526 }
1527
1528 // If we have a cached local dependence query for this instruction, remove it.
1529 LocalDepMapType::iterator LocalDepEntry = LocalDeps.find(RemInst);
1530 if (LocalDepEntry != LocalDeps.end()) {
1531 // Remove us from DepInst's reverse set now that the local dep info is gone.
1532 if (Instruction *Inst = LocalDepEntry->second.getInst())
1533 RemoveFromReverseMap(ReverseLocalDeps, Inst, RemInst);
1534
1535 // Remove this local dependency info.
1536 LocalDeps.erase(LocalDepEntry);
1537 }
1538
1539 // If we have any cached dependencies on this instruction, remove
1540 // them.
1541
1542 // If the instruction is a pointer, remove it from both the load info and the
1543 // store info.
1544 if (RemInst->getType()->isPointerTy()) {
1545 removeCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, false));
1546 removeCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, true));
1547 } else {
1548 // Otherwise, if the instructions is in the map directly, it must be a load.
1549 // Remove it.
1550 auto toRemoveIt = NonLocalDefsCache.find(RemInst);
1551 if (toRemoveIt != NonLocalDefsCache.end()) {
1552 assert(isa<LoadInst>(RemInst) &&
1553 "only load instructions should be added directly");
1554 const Instruction *DepV = toRemoveIt->second.getResult().getInst();
1555 ReverseNonLocalDefsCache.find(DepV)->second.erase(RemInst);
1556 NonLocalDefsCache.erase(toRemoveIt);
1557 }
1558 }
1559
1560 // Loop over all of the things that depend on the instruction we're removing.
1562
1563 // If we find RemInst as a clobber or Def in any of the maps for other values,
1564 // we need to replace its entry with a dirty version of the instruction after
1565 // it. If RemInst is a terminator, we use a null dirty value.
1566 //
1567 // Using a dirty version of the instruction after RemInst saves having to scan
1568 // the entire block to get to this point.
1569 MemDepResult NewDirtyVal;
1570 if (!RemInst->isTerminator())
1571 NewDirtyVal = MemDepResult::getDirty(&*++RemInst->getIterator());
1572
1573 ReverseDepMapType::iterator ReverseDepIt = ReverseLocalDeps.find(RemInst);
1574 if (ReverseDepIt != ReverseLocalDeps.end()) {
1575 // RemInst can't be the terminator if it has local stuff depending on it.
1576 assert(!ReverseDepIt->second.empty() && !RemInst->isTerminator() &&
1577 "Nothing can locally depend on a terminator");
1578
1579 for (Instruction *InstDependingOnRemInst : ReverseDepIt->second) {
1580 assert(InstDependingOnRemInst != RemInst &&
1581 "Already removed our local dep info");
1582
1583 LocalDeps[InstDependingOnRemInst] = NewDirtyVal;
1584
1585 // Make sure to remember that new things depend on NewDepInst.
1586 assert(NewDirtyVal.getInst() &&
1587 "There is no way something else can have "
1588 "a local dep on this if it is a terminator!");
1589 ReverseDepsToAdd.push_back(
1590 std::make_pair(NewDirtyVal.getInst(), InstDependingOnRemInst));
1591 }
1592
1593 ReverseLocalDeps.erase(ReverseDepIt);
1594
1595 // Add new reverse deps after scanning the set, to avoid invalidating the
1596 // 'ReverseDeps' reference.
1597 while (!ReverseDepsToAdd.empty()) {
1598 ReverseLocalDeps[ReverseDepsToAdd.back().first].insert(
1599 ReverseDepsToAdd.back().second);
1600 ReverseDepsToAdd.pop_back();
1601 }
1602 }
1603
1604 ReverseDepIt = ReverseNonLocalDeps.find(RemInst);
1605 if (ReverseDepIt != ReverseNonLocalDeps.end()) {
1606 for (Instruction *I : ReverseDepIt->second) {
1607 assert(I != RemInst && "Already removed NonLocalDep info for RemInst");
1608
1609 PerInstNLInfo &INLD = NonLocalDepsMap[I];
1610 // The information is now dirty!
1611 INLD.second = true;
1612
1613 for (auto &Entry : INLD.first) {
1614 if (Entry.getResult().getInst() != RemInst)
1615 continue;
1616
1617 // Convert to a dirty entry for the subsequent instruction.
1618 Entry.setResult(NewDirtyVal);
1619
1620 if (Instruction *NextI = NewDirtyVal.getInst())
1621 ReverseDepsToAdd.push_back(std::make_pair(NextI, I));
1622 }
1623 }
1624
1625 ReverseNonLocalDeps.erase(ReverseDepIt);
1626
1627 // Add new reverse deps after scanning the set, to avoid invalidating 'Set'
1628 while (!ReverseDepsToAdd.empty()) {
1629 ReverseNonLocalDeps[ReverseDepsToAdd.back().first].insert(
1630 ReverseDepsToAdd.back().second);
1631 ReverseDepsToAdd.pop_back();
1632 }
1633 }
1634
1635 // If the instruction is in ReverseNonLocalPtrDeps then it appears as a
1636 // value in the NonLocalPointerDeps info.
1637 ReverseNonLocalPtrDepTy::iterator ReversePtrDepIt =
1638 ReverseNonLocalPtrDeps.find(RemInst);
1639 if (ReversePtrDepIt != ReverseNonLocalPtrDeps.end()) {
1641 ReversePtrDepsToAdd;
1642
1643 for (ValueIsLoadPair P : ReversePtrDepIt->second) {
1644 assert(P.getPointer() != RemInst &&
1645 "Already removed NonLocalPointerDeps info for RemInst");
1646
1647 auto &NLPD = NonLocalPointerDeps[P];
1648
1649 NonLocalDepInfo &NLPDI = NLPD.NonLocalDeps;
1650
1651 // The cache is not valid for any specific block anymore.
1652 NLPD.Pair = BBSkipFirstBlockPair();
1653
1654 // Update any entries for RemInst to use the instruction after it.
1655 for (auto &Entry : NLPDI) {
1656 if (Entry.getResult().getInst() != RemInst)
1657 continue;
1658
1659 // Convert to a dirty entry for the subsequent instruction.
1660 Entry.setResult(NewDirtyVal);
1661
1662 if (Instruction *NewDirtyInst = NewDirtyVal.getInst())
1663 ReversePtrDepsToAdd.push_back(std::make_pair(NewDirtyInst, P));
1664 }
1665
1666 // Re-sort the NonLocalDepInfo. Changing the dirty entry to its
1667 // subsequent value may invalidate the sortedness.
1668 llvm::sort(NLPDI);
1669 }
1670
1671 ReverseNonLocalPtrDeps.erase(ReversePtrDepIt);
1672
1673 while (!ReversePtrDepsToAdd.empty()) {
1674 ReverseNonLocalPtrDeps[ReversePtrDepsToAdd.back().first].insert(
1675 ReversePtrDepsToAdd.back().second);
1676 ReversePtrDepsToAdd.pop_back();
1677 }
1678 }
1679
1680 assert(!NonLocalDepsMap.count(RemInst) && "RemInst got reinserted?");
1681 LLVM_DEBUG(verifyRemoved(RemInst));
1682}
1683
1684/// Verify that the specified instruction does not occur in our internal data
1685/// structures.
1686///
1687/// This function verifies by asserting in debug builds.
1688void MemoryDependenceResults::verifyRemoved(Instruction *D) const {
1689#ifndef NDEBUG
1690 for (const auto &DepKV : LocalDeps) {
1691 assert(DepKV.first != D && "Inst occurs in data structures");
1692 assert(DepKV.second.getInst() != D && "Inst occurs in data structures");
1693 }
1694
1695 for (const auto &DepKV : NonLocalPointerDeps) {
1696 assert(DepKV.first.getPointer() != D && "Inst occurs in NLPD map key");
1697 for (const auto &Entry : DepKV.second.NonLocalDeps)
1698 assert(Entry.getResult().getInst() != D && "Inst occurs as NLPD value");
1699 }
1700
1701 for (const auto &DepKV : NonLocalDepsMap) {
1702 assert(DepKV.first != D && "Inst occurs in data structures");
1703 const PerInstNLInfo &INLD = DepKV.second;
1704 for (const auto &Entry : INLD.first)
1705 assert(Entry.getResult().getInst() != D &&
1706 "Inst occurs in data structures");
1707 }
1708
1709 for (const auto &DepKV : ReverseLocalDeps) {
1710 assert(DepKV.first != D && "Inst occurs in data structures");
1711 for (Instruction *Inst : DepKV.second)
1712 assert(Inst != D && "Inst occurs in data structures");
1713 }
1714
1715 for (const auto &DepKV : ReverseNonLocalDeps) {
1716 assert(DepKV.first != D && "Inst occurs in data structures");
1717 for (Instruction *Inst : DepKV.second)
1718 assert(Inst != D && "Inst occurs in data structures");
1719 }
1720
1721 for (const auto &DepKV : ReverseNonLocalPtrDeps) {
1722 assert(DepKV.first != D && "Inst occurs in rev NLPD map");
1723
1724 for (ValueIsLoadPair P : DepKV.second)
1725 assert(P != ValueIsLoadPair(D, false) && P != ValueIsLoadPair(D, true) &&
1726 "Inst occurs in ReverseNonLocalPtrDeps map");
1727 }
1728#endif
1729}
1730
1731AnalysisKey MemoryDependenceAnalysis::Key;
1732
1735
1738 auto &AA = AM.getResult<AAManager>(F);
1739 auto &AC = AM.getResult<AssumptionAnalysis>(F);
1740 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
1741 auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1742 return MemoryDependenceResults(AA, AC, TLI, DT, DefaultBlockScanLimit);
1743}
1744
1746
1748 "Memory Dependence Analysis", false, true)
1754 "Memory Dependence Analysis", false, true)
1755
1757
1759
1761 MemDep.reset();
1762}
1763
1771
1773 FunctionAnalysisManager::Invalidator &Inv) {
1774 // Check whether our analysis is preserved.
1775 auto PAC = PA.getChecker<MemoryDependenceAnalysis>();
1776 if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>())
1777 // If not, give up now.
1778 return true;
1779
1780 // Check whether the analyses we depend on became invalid for any reason.
1781 if (Inv.invalidate<AAManager>(F, PA) ||
1782 Inv.invalidate<AssumptionAnalysis>(F, PA) ||
1783 Inv.invalidate<DominatorTreeAnalysis>(F, PA))
1784 return true;
1785
1786 // Otherwise this analysis result remains valid.
1787 return false;
1788}
1789
1791 return DefaultBlockScanLimit;
1792}
1793
1795 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
1796 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1797 auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
1798 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1799 MemDep.emplace(AA, AC, TLI, DT, BlockScanLimit);
1800 return false;
1801}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool isLoad(int Opcode)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Atomic ordering constants.
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
This file defines the DenseMap class.
Module.h This file contains the declarations for the Module class.
This defines the Use class.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
static const unsigned int NumResultsLimit
static cl::opt< unsigned > CacheGlobalLimit("memdep-cache-global-limit", cl::Hidden, cl::init(10000), cl::desc("The max number of entries allowed in a cache (default = 10000)"))
static ModRefInfo GetLocation(const Instruction *Inst, MemoryLocation &Loc, const TargetLibraryInfo &TLI)
If the given instruction references a specific memory location, fill in Loc with the details,...
static cl::opt< unsigned > BlockNumberLimit("memdep-block-number-limit", cl::Hidden, cl::init(200), cl::desc("The number of blocks to scan during memory " "dependency analysis (default = 200)"))
static void RemoveFromReverseMap(DenseMap< Instruction *, SmallPtrSet< KeyTy, 4 > > &ReverseMap, Instruction *Inst, KeyTy Val)
This is a helper function that removes Val from 'Inst's set in ReverseMap.
static void SortNonLocalDepInfoCache(MemoryDependenceResults::NonLocalDepInfo &Cache, unsigned NumSortedEntries)
Sort the NonLocalDepInfo cache, given a certain number of elements in the array that are already prop...
static void AssertSorted(MemoryDependenceResults::NonLocalDepInfo &Cache, int Count=-1)
This method is used when -debug is specified to verify that cache arrays are properly kept sorted.
static bool canSkipClobberingStore(const StoreInst *SI, const MemoryLocation &MemLoc, Align MemLocAlign, BatchAAResults &BatchAA, unsigned ScanLimit)
static cl::opt< unsigned > BlockScanLimit("memdep-block-scan-limit", cl::Hidden, cl::init(100), cl::desc("The number of instructions to scan in a block in memory " "dependency analysis (default = 100)"))
This file provides utility analysis objects describing memory locations.
static bool isOrdered(const Instruction *I)
This file contains the declarations for metadata subclasses.
static bool isInvariantLoad(const Instruction *I, const Value *Ptr, const bool IsKernelFn)
uint64_t IntrinsicInst * II
#define P(N)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
A manager for alias analyses.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
The possible results of an alias query.
@ NoAlias
The two locations do not alias at all.
@ PartialAlias
The two locations alias, but only due to a partial overlap.
@ MustAlias
The two locations precisely alias each other.
This templated class represents "all analyses that operate over <aparticular IR unit>" (e....
Definition Analysis.h:50
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
AnalysisUsage & addRequiredTransitive()
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator end()
Definition BasicBlock.h:474
unsigned getNumber() const
Definition BasicBlock.h:95
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:461
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
ModRefInfo getModRefInfoMask(const MemoryLocation &Loc, bool IgnoreLocals=false)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
bool erase(const KeyT &Val)
Definition DenseMap.h:330
DenseMapIterator< KeyT, ValueT, KeyInfoT, BucketT > iterator
Definition DenseMap.h:74
bool empty() const
Definition DenseMap.h:109
iterator end()
Definition DenseMap.h:81
Analysis pass which computes a DominatorTree.
Definition Dominators.h:278
Legacy analysis pass which computes a DominatorTree.
Definition Dominators.h:316
An instruction for ordering other memory operations.
FunctionPass(char &pid)
Definition Pass.h:316
const BasicBlock & getEntryBlock() const
Definition Function.h:809
unsigned getMaxBlockNumber() const
Return a value larger than the largest block number.
Definition Function.h:828
LLVM_ABI bool mayWriteToMemory() const LLVM_READONLY
Return true if this instruction may modify memory.
bool hasMetadata() const
Return true if this instruction has any metadata attached to it.
LLVM_ABI bool isIdenticalToWhenDefined(const Instruction *I, bool IntersectAttrs=false) const LLVM_READONLY
This is like isIdenticalTo, except that it ignores the SubclassOptionalData flags,...
bool isTerminator() const
LLVM_ABI bool mayReadFromMemory() const LLVM_READONLY
Return true if this instruction may read memory.
LLVM_ABI bool isVolatile() const LLVM_READONLY
Return true if this instruction has a volatile memory access.
A wrapper class for inspecting calls to intrinsic functions.
An instruction for reading from memory.
Value * getPointerOperand()
bool hasValue() const
bool isScalable() const
TypeSize getValue() const
A memory dependence query can return one of three different answers.
bool isNonLocal() const
Tests if this MemDepResult represents a query that is transparent to the start of the block,...
static MemDepResult getNonLocal()
bool isNonFuncLocal() const
Tests if this MemDepResult represents a query that is transparent to the start of the function.
static MemDepResult getClobber(Instruction *Inst)
bool isDef() const
Tests if this MemDepResult represents a query that is an instruction definition dependency.
static MemDepResult getUnknown()
bool isLocal() const
Tests if this MemDepResult represents a valid local query (Clobber/Def).
bool isUnknown() const
Tests if this MemDepResult represents a query which cannot and/or will not be computed.
static MemDepResult getNonFuncLocal()
static MemDepResult getDef(Instruction *Inst)
get methods: These are static ctor methods for creating various MemDepResult kinds.
Instruction * getInst() const
If this is a normal dependency, returns the instruction that is depended on.
An analysis that produces MemoryDependenceResults for a function.
MemoryDependenceResults run(Function &F, FunctionAnalysisManager &AM)
Provides a lazy, caching interface for making common memory aliasing information queries,...
MemDepResult getSimplePointerDependencyFrom(const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt, BasicBlock *BB, Instruction *QueryInst, unsigned *Limit, BatchAAResults &BatchAA)
std::vector< NonLocalDepEntry > NonLocalDepInfo
void invalidateCachedPredecessors()
Clears the PredIteratorCache info.
void invalidateCachedPointerInfo(Value *Ptr)
Invalidates cached information about the specified pointer, because it may be too conservative in mem...
MemDepResult getPointerDependencyFrom(const MemoryLocation &Loc, bool isLoad, BasicBlock::iterator ScanIt, BasicBlock *BB, Instruction *QueryInst=nullptr, unsigned *Limit=nullptr)
Returns the instruction on which a memory location depends.
void removeInstruction(Instruction *InstToRemove)
Removes an instruction from the dependence analysis, updating the dependence of instructions that pre...
MemDepResult getInvariantGroupPointerDependency(LoadInst *LI, BasicBlock *BB)
This analysis looks for other loads and stores with invariant.group metadata and the same pointer ope...
unsigned getDefaultBlockScanLimit() const
Some methods limit the number of instructions they will examine.
MemDepResult getDependency(Instruction *QueryInst)
Returns the instruction on which a memory operation depends.
const NonLocalDepInfo & getNonLocalCallDependency(CallBase *QueryCall)
Perform a full dependency query for the specified call, returning the set of blocks that the value is...
void getNonLocalPointerDependency(Instruction *QueryInst, SmallVectorImpl< NonLocalDepResult > &Result)
Perform a full dependency query for an access to the QueryInst's specified memory location,...
bool invalidate(Function &F, const PreservedAnalyses &PA, FunctionAnalysisManager::Invalidator &Inv)
Handle invalidation in the new PM.
A wrapper analysis pass for the legacy pass manager that exposes a MemoryDepnedenceResults instance.
bool runOnFunction(Function &) override
Pass Implementation stuff. This doesn't do any analysis eagerly.
void getAnalysisUsage(AnalysisUsage &AU) const override
Does not modify anything. It uses Value Numbering and Alias Analysis.
void releaseMemory() override
Clean up memory in between runs.
Representation for a specific memory location.
MemoryLocation getWithoutAATags() const
static LLVM_ABI MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
LocationSize Size
The maximum size of the location, in address-units, or UnknownSize if the size is not known.
static MemoryLocation getAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location after Ptr, while remaining within the underlying objec...
MemoryLocation getWithNewPtr(const Value *NewPtr) const
AAMDNodes AATags
The metadata nodes which describes the aliasing of the location (each member is null if that kind of ...
const Value * Ptr
The address of the start of the location.
static LLVM_ABI MemoryLocation getForArgument(const CallBase *Call, unsigned ArgIdx, const TargetLibraryInfo *TLI)
Return a location representing a particular argument of a call.
This is an entry in the NonLocalDepInfo cache.
void setResult(const MemDepResult &R)
const MemDepResult & getResult() const
This is a result from a NonLocal dependence query.
PHITransAddr - An address value which tracks and handles phi translation.
LLVM_ABI Value * translateValue(BasicBlock *CurBB, BasicBlock *PredBB, const DominatorTree *DT, bool MustDominate)
translateValue - PHI translate the current address up the CFG from CurBB to Pred, updating our state ...
Value * getAddr() const
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
PreservedAnalysisChecker getChecker() const
Build a checker for this PreservedAnalyses and the specified analysis type.
Definition Analysis.h:275
size_type size() const
Definition SmallPtrSet.h:99
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
iterator erase(const_iterator CI)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:284
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
This class represents the va_arg llvm instruction, which returns an argument of the specified type gi...
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition Value.cpp:964
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition Value.cpp:709
iterator_range< use_iterator > uses()
Definition Value.h:380
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
CallInst * Call
Abstract Attribute helper functions.
Definition Attributor.h:165
@ Entry
Definition COFF.h:862
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
initializer< Ty > init(const Ty &Val)
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2208
bool isStrongerThanUnordered(AtomicOrdering AO)
LLVM_ABI bool isNoAliasCall(const Value *V)
Return true if this pointer is returned by a noalias function.
const Value * getPointerOperand(const Value *V)
A helper function that returns the pointer operand of a load, store or GEP instruction.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
auto reverse(ContainerTy &&C)
Definition STLExtras.h:408
bool isModSet(const ModRefInfo MRI)
Definition ModRef.h:49
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1636
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
bool isModOrRefSet(const ModRefInfo MRI)
Definition ModRef.h:43
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
AtomicOrdering
Atomic ordering for LLVM's memory model.
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
Definition ModRef.h:28
@ Ref
The access may reference the value stored in memory.
Definition ModRef.h:32
@ ModRef
The access may reference and may modify the value stored in memory.
Definition ModRef.h:36
@ Mod
The access may modify the value stored in memory.
Definition ModRef.h:34
@ NoModRef
The access neither references nor modifies the value stored in memory.
Definition ModRef.h:30
@ Other
Any other memory.
Definition ModRef.h:68
LLVM_ABI Value * getFreedOperand(const CallBase *CB, const TargetLibraryInfo *TLI)
If this if a call to a free function, return the freed operand.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
bool isNoModRef(const ModRefInfo MRI)
Definition ModRef.h:40
bool isStrongerThan(AtomicOrdering AO, AtomicOrdering Other)
Returns true if ao is stronger than other as defined by the AtomicOrdering lattice,...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition Analysis.h:29