LLVM  7.0.0svn
MemoryDependenceAnalysis.cpp
Go to the documentation of this file.
1 //===- MemoryDependenceAnalysis.cpp - Mem Deps Implementation -------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements an analysis that determines, for a given memory
11 // operation, what preceding memory operations it depends on. It builds on
12 // alias analysis information, and tries to provide a lazy, caching interface to
13 // a common kind of alias information query.
14 //
15 //===----------------------------------------------------------------------===//
16 
18 #include "llvm/ADT/DenseMap.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/SmallPtrSet.h"
21 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/ADT/Statistic.h"
31 #include "llvm/IR/Attributes.h"
32 #include "llvm/IR/BasicBlock.h"
33 #include "llvm/IR/CallSite.h"
34 #include "llvm/IR/Constants.h"
35 #include "llvm/IR/DataLayout.h"
36 #include "llvm/IR/DerivedTypes.h"
37 #include "llvm/IR/Dominators.h"
38 #include "llvm/IR/Function.h"
39 #include "llvm/IR/InstrTypes.h"
40 #include "llvm/IR/Instruction.h"
41 #include "llvm/IR/Instructions.h"
42 #include "llvm/IR/IntrinsicInst.h"
43 #include "llvm/IR/LLVMContext.h"
44 #include "llvm/IR/Metadata.h"
45 #include "llvm/IR/Module.h"
47 #include "llvm/IR/Type.h"
48 #include "llvm/IR/Use.h"
49 #include "llvm/IR/User.h"
50 #include "llvm/IR/Value.h"
51 #include "llvm/Pass.h"
53 #include "llvm/Support/Casting.h"
55 #include "llvm/Support/Compiler.h"
56 #include "llvm/Support/Debug.h"
58 #include <algorithm>
59 #include <cassert>
60 #include <cstdint>
61 #include <iterator>
62 #include <utility>
63 
64 using namespace llvm;
65 
66 #define DEBUG_TYPE "memdep"
67 
68 STATISTIC(NumCacheNonLocal, "Number of fully cached non-local responses");
69 STATISTIC(NumCacheDirtyNonLocal, "Number of dirty cached non-local responses");
70 STATISTIC(NumUncacheNonLocal, "Number of uncached non-local responses");
71 
72 STATISTIC(NumCacheNonLocalPtr,
73  "Number of fully cached non-local ptr responses");
74 STATISTIC(NumCacheDirtyNonLocalPtr,
75  "Number of cached, but dirty, non-local ptr responses");
76 STATISTIC(NumUncacheNonLocalPtr, "Number of uncached non-local ptr responses");
77 STATISTIC(NumCacheCompleteNonLocalPtr,
78  "Number of block queries that were completely cached");
79 
80 // Limit for the number of instructions to scan in a block.
81 
83  "memdep-block-scan-limit", cl::Hidden, cl::init(100),
84  cl::desc("The number of instructions to scan in a block in memory "
85  "dependency analysis (default = 100)"));
86 
87 static cl::opt<unsigned>
88  BlockNumberLimit("memdep-block-number-limit", cl::Hidden, cl::init(1000),
89  cl::desc("The number of blocks to scan during memory "
90  "dependency analysis (default = 1000)"));
91 
92 // Limit on the number of memdep results to process.
93 static const unsigned int NumResultsLimit = 100;
94 
95 /// This is a helper function that removes Val from 'Inst's set in ReverseMap.
96 ///
97 /// If the set becomes empty, remove Inst's entry.
98 template <typename KeyTy>
99 static void
101  Instruction *Inst, KeyTy Val) {
102  typename DenseMap<Instruction *, SmallPtrSet<KeyTy, 4>>::iterator InstIt =
103  ReverseMap.find(Inst);
104  assert(InstIt != ReverseMap.end() && "Reverse map out of sync?");
105  bool Found = InstIt->second.erase(Val);
106  assert(Found && "Invalid reverse map!");
107  (void)Found;
108  if (InstIt->second.empty())
109  ReverseMap.erase(InstIt);
110 }
111 
112 /// If the given instruction references a specific memory location, fill in Loc
113 /// with the details, otherwise set Loc.Ptr to null.
114 ///
115 /// Returns a ModRefInfo value describing the general behavior of the
116 /// instruction.
118  const TargetLibraryInfo &TLI) {
119  if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
120  if (LI->isUnordered()) {
121  Loc = MemoryLocation::get(LI);
122  return ModRefInfo::Ref;
123  }
124  if (LI->getOrdering() == AtomicOrdering::Monotonic) {
125  Loc = MemoryLocation::get(LI);
126  return ModRefInfo::ModRef;
127  }
128  Loc = MemoryLocation();
129  return ModRefInfo::ModRef;
130  }
131 
132  if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
133  if (SI->isUnordered()) {
134  Loc = MemoryLocation::get(SI);
135  return ModRefInfo::Mod;
136  }
137  if (SI->getOrdering() == AtomicOrdering::Monotonic) {
138  Loc = MemoryLocation::get(SI);
139  return ModRefInfo::ModRef;
140  }
141  Loc = MemoryLocation();
142  return ModRefInfo::ModRef;
143  }
144 
145  if (const VAArgInst *V = dyn_cast<VAArgInst>(Inst)) {
146  Loc = MemoryLocation::get(V);
147  return ModRefInfo::ModRef;
148  }
149 
150  if (const CallInst *CI = isFreeCall(Inst, &TLI)) {
151  // calls to free() deallocate the entire structure
152  Loc = MemoryLocation(CI->getArgOperand(0));
153  return ModRefInfo::Mod;
154  }
155 
156  if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
157  switch (II->getIntrinsicID()) {
158  case Intrinsic::lifetime_start:
159  case Intrinsic::lifetime_end:
160  case Intrinsic::invariant_start:
161  Loc = MemoryLocation::getForArgument(II, 1, TLI);
162  // These intrinsics don't really modify the memory, but returning Mod
163  // will allow them to be handled conservatively.
164  return ModRefInfo::Mod;
165  case Intrinsic::invariant_end:
166  Loc = MemoryLocation::getForArgument(II, 2, TLI);
167  // These intrinsics don't really modify the memory, but returning Mod
168  // will allow them to be handled conservatively.
169  return ModRefInfo::Mod;
170  default:
171  break;
172  }
173  }
174 
175  // Otherwise, just do the coarse-grained thing that always works.
176  if (Inst->mayWriteToMemory())
177  return ModRefInfo::ModRef;
178  if (Inst->mayReadFromMemory())
179  return ModRefInfo::Ref;
180  return ModRefInfo::NoModRef;
181 }
182 
183 /// Private helper for finding the local dependencies of a call site.
184 MemDepResult MemoryDependenceResults::getCallSiteDependencyFrom(
185  CallSite CS, bool isReadOnlyCall, BasicBlock::iterator ScanIt,
186  BasicBlock *BB) {
187  unsigned Limit = BlockScanLimit;
188 
189  // Walk backwards through the block, looking for dependencies.
190  while (ScanIt != BB->begin()) {
191  Instruction *Inst = &*--ScanIt;
192  // Debug intrinsics don't cause dependences and should not affect Limit
193  if (isa<DbgInfoIntrinsic>(Inst))
194  continue;
195 
196  // Limit the amount of scanning we do so we don't end up with quadratic
197  // running time on extreme testcases.
198  --Limit;
199  if (!Limit)
200  return MemDepResult::getUnknown();
201 
202  // If this inst is a memory op, get the pointer it accessed
203  MemoryLocation Loc;
204  ModRefInfo MR = GetLocation(Inst, Loc, TLI);
205  if (Loc.Ptr) {
206  // A simple instruction.
207  if (isModOrRefSet(AA.getModRefInfo(CS, Loc)))
208  return MemDepResult::getClobber(Inst);
209  continue;
210  }
211 
212  if (auto InstCS = CallSite(Inst)) {
213  // If these two calls do not interfere, look past it.
214  if (isNoModRef(AA.getModRefInfo(CS, InstCS))) {
215  // If the two calls are the same, return InstCS as a Def, so that
216  // CS can be found redundant and eliminated.
217  if (isReadOnlyCall && !isModSet(MR) &&
219  return MemDepResult::getDef(Inst);
220 
221  // Otherwise if the two calls don't interact (e.g. InstCS is readnone)
222  // keep scanning.
223  continue;
224  } else
225  return MemDepResult::getClobber(Inst);
226  }
227 
228  // If we could not obtain a pointer for the instruction and the instruction
229  // touches memory then assume that this is a dependency.
230  if (isModOrRefSet(MR))
231  return MemDepResult::getClobber(Inst);
232  }
233 
234  // No dependence found. If this is the entry block of the function, it is
235  // unknown, otherwise it is non-local.
236  if (BB != &BB->getParent()->getEntryBlock())
237  return MemDepResult::getNonLocal();
239 }
240 
242  const Value *MemLocBase, int64_t MemLocOffs, unsigned MemLocSize,
243  const LoadInst *LI) {
244  // We can only extend simple integer loads.
245  if (!isa<IntegerType>(LI->getType()) || !LI->isSimple())
246  return 0;
247 
248  // Load widening is hostile to ThreadSanitizer: it may cause false positives
249  // or make the reports more cryptic (access sizes are wrong).
250  if (LI->getParent()->getParent()->hasFnAttribute(Attribute::SanitizeThread))
251  return 0;
252 
253  const DataLayout &DL = LI->getModule()->getDataLayout();
254 
255  // Get the base of this load.
256  int64_t LIOffs = 0;
257  const Value *LIBase =
259 
260  // If the two pointers are not based on the same pointer, we can't tell that
261  // they are related.
262  if (LIBase != MemLocBase)
263  return 0;
264 
265  // Okay, the two values are based on the same pointer, but returned as
266  // no-alias. This happens when we have things like two byte loads at "P+1"
267  // and "P+3". Check to see if increasing the size of the "LI" load up to its
268  // alignment (or the largest native integer type) will allow us to load all
269  // the bits required by MemLoc.
270 
271  // If MemLoc is before LI, then no widening of LI will help us out.
272  if (MemLocOffs < LIOffs)
273  return 0;
274 
275  // Get the alignment of the load in bytes. We assume that it is safe to load
276  // any legal integer up to this size without a problem. For example, if we're
277  // looking at an i8 load on x86-32 that is known 1024 byte aligned, we can
278  // widen it up to an i32 load. If it is known 2-byte aligned, we can widen it
279  // to i16.
280  unsigned LoadAlign = LI->getAlignment();
281 
282  int64_t MemLocEnd = MemLocOffs + MemLocSize;
283 
284  // If no amount of rounding up will let MemLoc fit into LI, then bail out.
285  if (LIOffs + LoadAlign < MemLocEnd)
286  return 0;
287 
288  // This is the size of the load to try. Start with the next larger power of
289  // two.
290  unsigned NewLoadByteSize = LI->getType()->getPrimitiveSizeInBits() / 8U;
291  NewLoadByteSize = NextPowerOf2(NewLoadByteSize);
292 
293  while (true) {
294  // If this load size is bigger than our known alignment or would not fit
295  // into a native integer register, then we fail.
296  if (NewLoadByteSize > LoadAlign ||
297  !DL.fitsInLegalInteger(NewLoadByteSize * 8))
298  return 0;
299 
300  if (LIOffs + NewLoadByteSize > MemLocEnd &&
302  Attribute::SanitizeAddress) ||
304  Attribute::SanitizeHWAddress)))
305  // We will be reading past the location accessed by the original program.
306  // While this is safe in a regular build, Address Safety analysis tools
307  // may start reporting false warnings. So, don't do widening.
308  return 0;
309 
310  // If a load of this width would include all of MemLoc, then we succeed.
311  if (LIOffs + NewLoadByteSize >= MemLocEnd)
312  return NewLoadByteSize;
313 
314  NewLoadByteSize <<= 1;
315  }
316 }
317 
318 static bool isVolatile(Instruction *Inst) {
319  if (auto *LI = dyn_cast<LoadInst>(Inst))
320  return LI->isVolatile();
321  if (auto *SI = dyn_cast<StoreInst>(Inst))
322  return SI->isVolatile();
323  if (auto *AI = dyn_cast<AtomicCmpXchgInst>(Inst))
324  return AI->isVolatile();
325  return false;
326 }
327 
329  const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
330  BasicBlock *BB, Instruction *QueryInst, unsigned *Limit) {
331  MemDepResult InvariantGroupDependency = MemDepResult::getUnknown();
332  if (QueryInst != nullptr) {
333  if (auto *LI = dyn_cast<LoadInst>(QueryInst)) {
334  InvariantGroupDependency = getInvariantGroupPointerDependency(LI, BB);
335 
336  if (InvariantGroupDependency.isDef())
337  return InvariantGroupDependency;
338  }
339  }
341  MemLoc, isLoad, ScanIt, BB, QueryInst, Limit);
342  if (SimpleDep.isDef())
343  return SimpleDep;
344  // Non-local invariant group dependency indicates there is non local Def
345  // (it only returns nonLocal if it finds nonLocal def), which is better than
346  // local clobber and everything else.
347  if (InvariantGroupDependency.isNonLocal())
348  return InvariantGroupDependency;
349 
350  assert(InvariantGroupDependency.isUnknown() &&
351  "InvariantGroupDependency should be only unknown at this point");
352  return SimpleDep;
353 }
354 
357  BasicBlock *BB) {
358 
360  return MemDepResult::getUnknown();
361 
362  // Take the ptr operand after all casts and geps 0. This way we can search
363  // cast graph down only.
364  Value *LoadOperand = LI->getPointerOperand()->stripPointerCasts();
365 
366  // It's is not safe to walk the use list of global value, because function
367  // passes aren't allowed to look outside their functions.
368  // FIXME: this could be fixed by filtering instructions from outside
369  // of current function.
370  if (isa<GlobalValue>(LoadOperand))
371  return MemDepResult::getUnknown();
372 
373  // Queue to process all pointers that are equivalent to load operand.
374  SmallVector<const Value *, 8> LoadOperandsQueue;
375  LoadOperandsQueue.push_back(LoadOperand);
376 
377  Instruction *ClosestDependency = nullptr;
378  // Order of instructions in uses list is unpredictible. In order to always
379  // get the same result, we will look for the closest dominance.
380  auto GetClosestDependency = [this](Instruction *Best, Instruction *Other) {
381  assert(Other && "Must call it with not null instruction");
382  if (Best == nullptr || DT.dominates(Best, Other))
383  return Other;
384  return Best;
385  };
386 
387  // FIXME: This loop is O(N^2) because dominates can be O(n) and in worst case
388  // we will see all the instructions. This should be fixed in MSSA.
389  while (!LoadOperandsQueue.empty()) {
390  const Value *Ptr = LoadOperandsQueue.pop_back_val();
391  assert(Ptr && !isa<GlobalValue>(Ptr) &&
392  "Null or GlobalValue should not be inserted");
393 
394  for (const Use &Us : Ptr->uses()) {
395  auto *U = dyn_cast<Instruction>(Us.getUser());
396  if (!U || U == LI || !DT.dominates(U, LI))
397  continue;
398 
399  // Bitcast or gep with zeros are using Ptr. Add to queue to check it's
400  // users. U = bitcast Ptr
401  if (isa<BitCastInst>(U)) {
402  LoadOperandsQueue.push_back(U);
403  continue;
404  }
405  // Gep with zeros is equivalent to bitcast.
406  // FIXME: we are not sure if some bitcast should be canonicalized to gep 0
407  // or gep 0 to bitcast because of SROA, so there are 2 forms. When
408  // typeless pointers will be ready then both cases will be gone
409  // (and this BFS also won't be needed).
410  if (auto *GEP = dyn_cast<GetElementPtrInst>(U))
411  if (GEP->hasAllZeroIndices()) {
412  LoadOperandsQueue.push_back(U);
413  continue;
414  }
415 
416  // If we hit load/store with the same invariant.group metadata (and the
417  // same pointer operand) we can assume that value pointed by pointer
418  // operand didn't change.
419  if ((isa<LoadInst>(U) || isa<StoreInst>(U)) &&
420  U->getMetadata(LLVMContext::MD_invariant_group) != nullptr)
421  ClosestDependency = GetClosestDependency(ClosestDependency, U);
422  }
423  }
424 
425  if (!ClosestDependency)
426  return MemDepResult::getUnknown();
427  if (ClosestDependency->getParent() == BB)
428  return MemDepResult::getDef(ClosestDependency);
429  // Def(U) can't be returned here because it is non-local. If local
430  // dependency won't be found then return nonLocal counting that the
431  // user will call getNonLocalPointerDependency, which will return cached
432  // result.
433  NonLocalDefsCache.try_emplace(
434  LI, NonLocalDepResult(ClosestDependency->getParent(),
435  MemDepResult::getDef(ClosestDependency), nullptr));
436  ReverseNonLocalDefsCache[ClosestDependency].insert(LI);
437  return MemDepResult::getNonLocal();
438 }
439 
441  const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
442  BasicBlock *BB, Instruction *QueryInst, unsigned *Limit) {
443  bool isInvariantLoad = false;
444 
445  if (!Limit) {
446  unsigned DefaultLimit = BlockScanLimit;
447  return getSimplePointerDependencyFrom(MemLoc, isLoad, ScanIt, BB, QueryInst,
448  &DefaultLimit);
449  }
450 
451  // We must be careful with atomic accesses, as they may allow another thread
452  // to touch this location, clobbering it. We are conservative: if the
453  // QueryInst is not a simple (non-atomic) memory access, we automatically
454  // return getClobber.
455  // If it is simple, we know based on the results of
456  // "Compiler testing via a theory of sound optimisations in the C11/C++11
457  // memory model" in PLDI 2013, that a non-atomic location can only be
458  // clobbered between a pair of a release and an acquire action, with no
459  // access to the location in between.
460  // Here is an example for giving the general intuition behind this rule.
461  // In the following code:
462  // store x 0;
463  // release action; [1]
464  // acquire action; [4]
465  // %val = load x;
466  // It is unsafe to replace %val by 0 because another thread may be running:
467  // acquire action; [2]
468  // store x 42;
469  // release action; [3]
470  // with synchronization from 1 to 2 and from 3 to 4, resulting in %val
471  // being 42. A key property of this program however is that if either
472  // 1 or 4 were missing, there would be a race between the store of 42
473  // either the store of 0 or the load (making the whole program racy).
474  // The paper mentioned above shows that the same property is respected
475  // by every program that can detect any optimization of that kind: either
476  // it is racy (undefined) or there is a release followed by an acquire
477  // between the pair of accesses under consideration.
478 
479  // If the load is invariant, we "know" that it doesn't alias *any* write. We
480  // do want to respect mustalias results since defs are useful for value
481  // forwarding, but any mayalias write can be assumed to be noalias.
482  // Arguably, this logic should be pushed inside AliasAnalysis itself.
483  if (isLoad && QueryInst) {
484  LoadInst *LI = dyn_cast<LoadInst>(QueryInst);
485  if (LI && LI->getMetadata(LLVMContext::MD_invariant_load) != nullptr)
486  isInvariantLoad = true;
487  }
488 
489  const DataLayout &DL = BB->getModule()->getDataLayout();
490 
491  // Create a numbered basic block to lazily compute and cache instruction
492  // positions inside a BB. This is used to provide fast queries for relative
493  // position between two instructions in a BB and can be used by
494  // AliasAnalysis::callCapturesBefore.
495  OrderedBasicBlock OBB(BB);
496 
497  // Return "true" if and only if the instruction I is either a non-simple
498  // load or a non-simple store.
499  auto isNonSimpleLoadOrStore = [](Instruction *I) -> bool {
500  if (auto *LI = dyn_cast<LoadInst>(I))
501  return !LI->isSimple();
502  if (auto *SI = dyn_cast<StoreInst>(I))
503  return !SI->isSimple();
504  return false;
505  };
506 
507  // Return "true" if I is not a load and not a store, but it does access
508  // memory.
509  auto isOtherMemAccess = [](Instruction *I) -> bool {
510  return !isa<LoadInst>(I) && !isa<StoreInst>(I) && I->mayReadOrWriteMemory();
511  };
512 
513  // Walk backwards through the basic block, looking for dependencies.
514  while (ScanIt != BB->begin()) {
515  Instruction *Inst = &*--ScanIt;
516 
517  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
518  // Debug intrinsics don't (and can't) cause dependencies.
519  if (isa<DbgInfoIntrinsic>(II))
520  continue;
521 
522  // Limit the amount of scanning we do so we don't end up with quadratic
523  // running time on extreme testcases.
524  --*Limit;
525  if (!*Limit)
526  return MemDepResult::getUnknown();
527 
528  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
529  // If we reach a lifetime begin or end marker, then the query ends here
530  // because the value is undefined.
531  if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
532  // FIXME: This only considers queries directly on the invariant-tagged
533  // pointer, not on query pointers that are indexed off of them. It'd
534  // be nice to handle that at some point (the right approach is to use
535  // GetPointerBaseWithConstantOffset).
536  if (AA.isMustAlias(MemoryLocation(II->getArgOperand(1)), MemLoc))
537  return MemDepResult::getDef(II);
538  continue;
539  }
540  }
541 
542  // Values depend on loads if the pointers are must aliased. This means
543  // that a load depends on another must aliased load from the same value.
544  // One exception is atomic loads: a value can depend on an atomic load that
545  // it does not alias with when this atomic load indicates that another
546  // thread may be accessing the location.
547  if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
548  // While volatile access cannot be eliminated, they do not have to clobber
549  // non-aliasing locations, as normal accesses, for example, can be safely
550  // reordered with volatile accesses.
551  if (LI->isVolatile()) {
552  if (!QueryInst)
553  // Original QueryInst *may* be volatile
554  return MemDepResult::getClobber(LI);
555  if (isVolatile(QueryInst))
556  // Ordering required if QueryInst is itself volatile
557  return MemDepResult::getClobber(LI);
558  // Otherwise, volatile doesn't imply any special ordering
559  }
560 
561  // Atomic loads have complications involved.
562  // A Monotonic (or higher) load is OK if the query inst is itself not
563  // atomic.
564  // FIXME: This is overly conservative.
565  if (LI->isAtomic() && isStrongerThanUnordered(LI->getOrdering())) {
566  if (!QueryInst || isNonSimpleLoadOrStore(QueryInst) ||
567  isOtherMemAccess(QueryInst))
568  return MemDepResult::getClobber(LI);
569  if (LI->getOrdering() != AtomicOrdering::Monotonic)
570  return MemDepResult::getClobber(LI);
571  }
572 
573  MemoryLocation LoadLoc = MemoryLocation::get(LI);
574 
575  // If we found a pointer, check if it could be the same as our pointer.
576  AliasResult R = AA.alias(LoadLoc, MemLoc);
577 
578  if (isLoad) {
579  if (R == NoAlias)
580  continue;
581 
582  // Must aliased loads are defs of each other.
583  if (R == MustAlias)
584  return MemDepResult::getDef(Inst);
585 
586 #if 0 // FIXME: Temporarily disabled. GVN is cleverly rewriting loads
587  // in terms of clobbering loads, but since it does this by looking
588  // at the clobbering load directly, it doesn't know about any
589  // phi translation that may have happened along the way.
590 
591  // If we have a partial alias, then return this as a clobber for the
592  // client to handle.
593  if (R == PartialAlias)
594  return MemDepResult::getClobber(Inst);
595 #endif
596 
597  // Random may-alias loads don't depend on each other without a
598  // dependence.
599  continue;
600  }
601 
602  // Stores don't depend on other no-aliased accesses.
603  if (R == NoAlias)
604  continue;
605 
606  // Stores don't alias loads from read-only memory.
607  if (AA.pointsToConstantMemory(LoadLoc))
608  continue;
609 
610  // Stores depend on may/must aliased loads.
611  return MemDepResult::getDef(Inst);
612  }
613 
614  if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
615  // Atomic stores have complications involved.
616  // A Monotonic store is OK if the query inst is itself not atomic.
617  // FIXME: This is overly conservative.
618  if (!SI->isUnordered() && SI->isAtomic()) {
619  if (!QueryInst || isNonSimpleLoadOrStore(QueryInst) ||
620  isOtherMemAccess(QueryInst))
622  if (SI->getOrdering() != AtomicOrdering::Monotonic)
624  }
625 
626  // FIXME: this is overly conservative.
627  // While volatile access cannot be eliminated, they do not have to clobber
628  // non-aliasing locations, as normal accesses can for example be reordered
629  // with volatile accesses.
630  if (SI->isVolatile())
631  if (!QueryInst || isNonSimpleLoadOrStore(QueryInst) ||
632  isOtherMemAccess(QueryInst))
634 
635  // If alias analysis can tell that this store is guaranteed to not modify
636  // the query pointer, ignore it. Use getModRefInfo to handle cases where
637  // the query pointer points to constant memory etc.
638  if (!isModOrRefSet(AA.getModRefInfo(SI, MemLoc)))
639  continue;
640 
641  // Ok, this store might clobber the query pointer. Check to see if it is
642  // a must alias: in this case, we want to return this as a def.
643  // FIXME: Use ModRefInfo::Must bit from getModRefInfo call above.
645 
646  // If we found a pointer, check if it could be the same as our pointer.
647  AliasResult R = AA.alias(StoreLoc, MemLoc);
648 
649  if (R == NoAlias)
650  continue;
651  if (R == MustAlias)
652  return MemDepResult::getDef(Inst);
653  if (isInvariantLoad)
654  continue;
655  return MemDepResult::getClobber(Inst);
656  }
657 
658  // If this is an allocation, and if we know that the accessed pointer is to
659  // the allocation, return Def. This means that there is no dependence and
660  // the access can be optimized based on that. For example, a load could
661  // turn into undef. Note that we can bypass the allocation itself when
662  // looking for a clobber in many cases; that's an alias property and is
663  // handled by BasicAA.
664  if (isa<AllocaInst>(Inst) || isNoAliasFn(Inst, &TLI)) {
665  const Value *AccessPtr = GetUnderlyingObject(MemLoc.Ptr, DL);
666  if (AccessPtr == Inst || AA.isMustAlias(Inst, AccessPtr))
667  return MemDepResult::getDef(Inst);
668  }
669 
670  if (isInvariantLoad)
671  continue;
672 
673  // A release fence requires that all stores complete before it, but does
674  // not prevent the reordering of following loads or stores 'before' the
675  // fence. As a result, we look past it when finding a dependency for
676  // loads. DSE uses this to find preceeding stores to delete and thus we
677  // can't bypass the fence if the query instruction is a store.
678  if (FenceInst *FI = dyn_cast<FenceInst>(Inst))
679  if (isLoad && FI->getOrdering() == AtomicOrdering::Release)
680  continue;
681 
682  // See if this instruction (e.g. a call or vaarg) mod/ref's the pointer.
683  ModRefInfo MR = AA.getModRefInfo(Inst, MemLoc);
684  // If necessary, perform additional analysis.
685  if (isModAndRefSet(MR))
686  MR = AA.callCapturesBefore(Inst, MemLoc, &DT, &OBB);
687  switch (clearMust(MR)) {
689  // If the call has no effect on the queried pointer, just ignore it.
690  continue;
691  case ModRefInfo::Mod:
692  return MemDepResult::getClobber(Inst);
693  case ModRefInfo::Ref:
694  // If the call is known to never store to the pointer, and if this is a
695  // load query, we can safely ignore it (scan past it).
696  if (isLoad)
697  continue;
699  default:
700  // Otherwise, there is a potential dependence. Return a clobber.
701  return MemDepResult::getClobber(Inst);
702  }
703  }
704 
705  // No dependence found. If this is the entry block of the function, it is
706  // unknown, otherwise it is non-local.
707  if (BB != &BB->getParent()->getEntryBlock())
708  return MemDepResult::getNonLocal();
710 }
711 
713  Instruction *ScanPos = QueryInst;
714 
715  // Check for a cached result
716  MemDepResult &LocalCache = LocalDeps[QueryInst];
717 
718  // If the cached entry is non-dirty, just return it. Note that this depends
719  // on MemDepResult's default constructing to 'dirty'.
720  if (!LocalCache.isDirty())
721  return LocalCache;
722 
723  // Otherwise, if we have a dirty entry, we know we can start the scan at that
724  // instruction, which may save us some work.
725  if (Instruction *Inst = LocalCache.getInst()) {
726  ScanPos = Inst;
727 
728  RemoveFromReverseMap(ReverseLocalDeps, Inst, QueryInst);
729  }
730 
731  BasicBlock *QueryParent = QueryInst->getParent();
732 
733  // Do the scan.
734  if (BasicBlock::iterator(QueryInst) == QueryParent->begin()) {
735  // No dependence found. If this is the entry block of the function, it is
736  // unknown, otherwise it is non-local.
737  if (QueryParent != &QueryParent->getParent()->getEntryBlock())
738  LocalCache = MemDepResult::getNonLocal();
739  else
740  LocalCache = MemDepResult::getNonFuncLocal();
741  } else {
742  MemoryLocation MemLoc;
743  ModRefInfo MR = GetLocation(QueryInst, MemLoc, TLI);
744  if (MemLoc.Ptr) {
745  // If we can do a pointer scan, make it happen.
746  bool isLoad = !isModSet(MR);
747  if (auto *II = dyn_cast<IntrinsicInst>(QueryInst))
748  isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_start;
749 
750  LocalCache = getPointerDependencyFrom(
751  MemLoc, isLoad, ScanPos->getIterator(), QueryParent, QueryInst);
752  } else if (isa<CallInst>(QueryInst) || isa<InvokeInst>(QueryInst)) {
753  CallSite QueryCS(QueryInst);
754  bool isReadOnly = AA.onlyReadsMemory(QueryCS);
755  LocalCache = getCallSiteDependencyFrom(
756  QueryCS, isReadOnly, ScanPos->getIterator(), QueryParent);
757  } else
758  // Non-memory instruction.
759  LocalCache = MemDepResult::getUnknown();
760  }
761 
762  // Remember the result!
763  if (Instruction *I = LocalCache.getInst())
764  ReverseLocalDeps[I].insert(QueryInst);
765 
766  return LocalCache;
767 }
768 
769 #ifndef NDEBUG
770 /// This method is used when -debug is specified to verify that cache arrays
771 /// are properly kept sorted.
773  int Count = -1) {
774  if (Count == -1)
775  Count = Cache.size();
776  assert(std::is_sorted(Cache.begin(), Cache.begin() + Count) &&
777  "Cache isn't sorted!");
778 }
779 #endif
780 
783  assert(getDependency(QueryCS.getInstruction()).isNonLocal() &&
784  "getNonLocalCallDependency should only be used on calls with "
785  "non-local deps!");
786  PerInstNLInfo &CacheP = NonLocalDeps[QueryCS.getInstruction()];
787  NonLocalDepInfo &Cache = CacheP.first;
788 
789  // This is the set of blocks that need to be recomputed. In the cached case,
790  // this can happen due to instructions being deleted etc. In the uncached
791  // case, this starts out as the set of predecessors we care about.
792  SmallVector<BasicBlock *, 32> DirtyBlocks;
793 
794  if (!Cache.empty()) {
795  // Okay, we have a cache entry. If we know it is not dirty, just return it
796  // with no computation.
797  if (!CacheP.second) {
798  ++NumCacheNonLocal;
799  return Cache;
800  }
801 
802  // If we already have a partially computed set of results, scan them to
803  // determine what is dirty, seeding our initial DirtyBlocks worklist.
804  for (auto &Entry : Cache)
805  if (Entry.getResult().isDirty())
806  DirtyBlocks.push_back(Entry.getBB());
807 
808  // Sort the cache so that we can do fast binary search lookups below.
809  llvm::sort(Cache.begin(), Cache.end());
810 
811  ++NumCacheDirtyNonLocal;
812  // cerr << "CACHED CASE: " << DirtyBlocks.size() << " dirty: "
813  // << Cache.size() << " cached: " << *QueryInst;
814  } else {
815  // Seed DirtyBlocks with each of the preds of QueryInst's block.
816  BasicBlock *QueryBB = QueryCS.getInstruction()->getParent();
817  for (BasicBlock *Pred : PredCache.get(QueryBB))
818  DirtyBlocks.push_back(Pred);
819  ++NumUncacheNonLocal;
820  }
821 
822  // isReadonlyCall - If this is a read-only call, we can be more aggressive.
823  bool isReadonlyCall = AA.onlyReadsMemory(QueryCS);
824 
826 
827  unsigned NumSortedEntries = Cache.size();
828  LLVM_DEBUG(AssertSorted(Cache));
829 
830  // Iterate while we still have blocks to update.
831  while (!DirtyBlocks.empty()) {
832  BasicBlock *DirtyBB = DirtyBlocks.back();
833  DirtyBlocks.pop_back();
834 
835  // Already processed this block?
836  if (!Visited.insert(DirtyBB).second)
837  continue;
838 
839  // Do a binary search to see if we already have an entry for this block in
840  // the cache set. If so, find it.
841  LLVM_DEBUG(AssertSorted(Cache, NumSortedEntries));
842  NonLocalDepInfo::iterator Entry =
843  std::upper_bound(Cache.begin(), Cache.begin() + NumSortedEntries,
844  NonLocalDepEntry(DirtyBB));
845  if (Entry != Cache.begin() && std::prev(Entry)->getBB() == DirtyBB)
846  --Entry;
847 
848  NonLocalDepEntry *ExistingResult = nullptr;
849  if (Entry != Cache.begin() + NumSortedEntries &&
850  Entry->getBB() == DirtyBB) {
851  // If we already have an entry, and if it isn't already dirty, the block
852  // is done.
853  if (!Entry->getResult().isDirty())
854  continue;
855 
856  // Otherwise, remember this slot so we can update the value.
857  ExistingResult = &*Entry;
858  }
859 
860  // If the dirty entry has a pointer, start scanning from it so we don't have
861  // to rescan the entire block.
862  BasicBlock::iterator ScanPos = DirtyBB->end();
863  if (ExistingResult) {
864  if (Instruction *Inst = ExistingResult->getResult().getInst()) {
865  ScanPos = Inst->getIterator();
866  // We're removing QueryInst's use of Inst.
867  RemoveFromReverseMap(ReverseNonLocalDeps, Inst,
868  QueryCS.getInstruction());
869  }
870  }
871 
872  // Find out if this block has a local dependency for QueryInst.
873  MemDepResult Dep;
874 
875  if (ScanPos != DirtyBB->begin()) {
876  Dep =
877  getCallSiteDependencyFrom(QueryCS, isReadonlyCall, ScanPos, DirtyBB);
878  } else if (DirtyBB != &DirtyBB->getParent()->getEntryBlock()) {
879  // No dependence found. If this is the entry block of the function, it is
880  // a clobber, otherwise it is unknown.
882  } else {
884  }
885 
886  // If we had a dirty entry for the block, update it. Otherwise, just add
887  // a new entry.
888  if (ExistingResult)
889  ExistingResult->setResult(Dep);
890  else
891  Cache.push_back(NonLocalDepEntry(DirtyBB, Dep));
892 
893  // If the block has a dependency (i.e. it isn't completely transparent to
894  // the value), remember the association!
895  if (!Dep.isNonLocal()) {
896  // Keep the ReverseNonLocalDeps map up to date so we can efficiently
897  // update this when we remove instructions.
898  if (Instruction *Inst = Dep.getInst())
899  ReverseNonLocalDeps[Inst].insert(QueryCS.getInstruction());
900  } else {
901 
902  // If the block *is* completely transparent to the load, we need to check
903  // the predecessors of this block. Add them to our worklist.
904  for (BasicBlock *Pred : PredCache.get(DirtyBB))
905  DirtyBlocks.push_back(Pred);
906  }
907  }
908 
909  return Cache;
910 }
911 
913  Instruction *QueryInst, SmallVectorImpl<NonLocalDepResult> &Result) {
914  const MemoryLocation Loc = MemoryLocation::get(QueryInst);
915  bool isLoad = isa<LoadInst>(QueryInst);
916  BasicBlock *FromBB = QueryInst->getParent();
917  assert(FromBB);
918 
919  assert(Loc.Ptr->getType()->isPointerTy() &&
920  "Can't get pointer deps of a non-pointer!");
921  Result.clear();
922  {
923  // Check if there is cached Def with invariant.group.
924  auto NonLocalDefIt = NonLocalDefsCache.find(QueryInst);
925  if (NonLocalDefIt != NonLocalDefsCache.end()) {
926  Result.push_back(NonLocalDefIt->second);
927  ReverseNonLocalDefsCache[NonLocalDefIt->second.getResult().getInst()]
928  .erase(QueryInst);
929  NonLocalDefsCache.erase(NonLocalDefIt);
930  return;
931  }
932  }
933  // This routine does not expect to deal with volatile instructions.
934  // Doing so would require piping through the QueryInst all the way through.
935  // TODO: volatiles can't be elided, but they can be reordered with other
936  // non-volatile accesses.
937 
938  // We currently give up on any instruction which is ordered, but we do handle
939  // atomic instructions which are unordered.
940  // TODO: Handle ordered instructions
941  auto isOrdered = [](Instruction *Inst) {
942  if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
943  return !LI->isUnordered();
944  } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
945  return !SI->isUnordered();
946  }
947  return false;
948  };
949  if (isVolatile(QueryInst) || isOrdered(QueryInst)) {
951  const_cast<Value *>(Loc.Ptr)));
952  return;
953  }
954  const DataLayout &DL = FromBB->getModule()->getDataLayout();
955  PHITransAddr Address(const_cast<Value *>(Loc.Ptr), DL, &AC);
956 
957  // This is the set of blocks we've inspected, and the pointer we consider in
958  // each block. Because of critical edges, we currently bail out if querying
959  // a block with multiple different pointers. This can happen during PHI
960  // translation.
962  if (getNonLocalPointerDepFromBB(QueryInst, Address, Loc, isLoad, FromBB,
963  Result, Visited, true))
964  return;
965  Result.clear();
967  const_cast<Value *>(Loc.Ptr)));
968 }
969 
970 /// Compute the memdep value for BB with Pointer/PointeeSize using either
971 /// cached information in Cache or by doing a lookup (which may use dirty cache
972 /// info if available).
973 ///
974 /// If we do a lookup, add the result to the cache.
975 MemDepResult MemoryDependenceResults::GetNonLocalInfoForBlock(
976  Instruction *QueryInst, const MemoryLocation &Loc, bool isLoad,
977  BasicBlock *BB, NonLocalDepInfo *Cache, unsigned NumSortedEntries) {
978 
979  // Do a binary search to see if we already have an entry for this block in
980  // the cache set. If so, find it.
981  NonLocalDepInfo::iterator Entry = std::upper_bound(
982  Cache->begin(), Cache->begin() + NumSortedEntries, NonLocalDepEntry(BB));
983  if (Entry != Cache->begin() && (Entry - 1)->getBB() == BB)
984  --Entry;
985 
986  NonLocalDepEntry *ExistingResult = nullptr;
987  if (Entry != Cache->begin() + NumSortedEntries && Entry->getBB() == BB)
988  ExistingResult = &*Entry;
989 
990  // If we have a cached entry, and it is non-dirty, use it as the value for
991  // this dependency.
992  if (ExistingResult && !ExistingResult->getResult().isDirty()) {
993  ++NumCacheNonLocalPtr;
994  return ExistingResult->getResult();
995  }
996 
997  // Otherwise, we have to scan for the value. If we have a dirty cache
998  // entry, start scanning from its position, otherwise we scan from the end
999  // of the block.
1000  BasicBlock::iterator ScanPos = BB->end();
1001  if (ExistingResult && ExistingResult->getResult().getInst()) {
1002  assert(ExistingResult->getResult().getInst()->getParent() == BB &&
1003  "Instruction invalidated?");
1004  ++NumCacheDirtyNonLocalPtr;
1005  ScanPos = ExistingResult->getResult().getInst()->getIterator();
1006 
1007  // Eliminating the dirty entry from 'Cache', so update the reverse info.
1008  ValueIsLoadPair CacheKey(Loc.Ptr, isLoad);
1009  RemoveFromReverseMap(ReverseNonLocalPtrDeps, &*ScanPos, CacheKey);
1010  } else {
1011  ++NumUncacheNonLocalPtr;
1012  }
1013 
1014  // Scan the block for the dependency.
1015  MemDepResult Dep =
1016  getPointerDependencyFrom(Loc, isLoad, ScanPos, BB, QueryInst);
1017 
1018  // If we had a dirty entry for the block, update it. Otherwise, just add
1019  // a new entry.
1020  if (ExistingResult)
1021  ExistingResult->setResult(Dep);
1022  else
1023  Cache->push_back(NonLocalDepEntry(BB, Dep));
1024 
1025  // If the block has a dependency (i.e. it isn't completely transparent to
1026  // the value), remember the reverse association because we just added it
1027  // to Cache!
1028  if (!Dep.isDef() && !Dep.isClobber())
1029  return Dep;
1030 
1031  // Keep the ReverseNonLocalPtrDeps map up to date so we can efficiently
1032  // update MemDep when we remove instructions.
1033  Instruction *Inst = Dep.getInst();
1034  assert(Inst && "Didn't depend on anything?");
1035  ValueIsLoadPair CacheKey(Loc.Ptr, isLoad);
1036  ReverseNonLocalPtrDeps[Inst].insert(CacheKey);
1037  return Dep;
1038 }
1039 
1040 /// Sort the NonLocalDepInfo cache, given a certain number of elements in the
1041 /// array that are already properly ordered.
1042 ///
1043 /// This is optimized for the case when only a few entries are added.
1044 static void
1046  unsigned NumSortedEntries) {
1047  switch (Cache.size() - NumSortedEntries) {
1048  case 0:
1049  // done, no new entries.
1050  break;
1051  case 2: {
1052  // Two new entries, insert the last one into place.
1053  NonLocalDepEntry Val = Cache.back();
1054  Cache.pop_back();
1055  MemoryDependenceResults::NonLocalDepInfo::iterator Entry =
1056  std::upper_bound(Cache.begin(), Cache.end() - 1, Val);
1057  Cache.insert(Entry, Val);
1059  }
1060  case 1:
1061  // One new entry, Just insert the new value at the appropriate position.
1062  if (Cache.size() != 1) {
1063  NonLocalDepEntry Val = Cache.back();
1064  Cache.pop_back();
1065  MemoryDependenceResults::NonLocalDepInfo::iterator Entry =
1066  std::upper_bound(Cache.begin(), Cache.end(), Val);
1067  Cache.insert(Entry, Val);
1068  }
1069  break;
1070  default:
1071  // Added many values, do a full scale sort.
1072  llvm::sort(Cache.begin(), Cache.end());
1073  break;
1074  }
1075 }
1076 
1077 /// Perform a dependency query based on pointer/pointeesize starting at the end
1078 /// of StartBB.
1079 ///
1080 /// Add any clobber/def results to the results vector and keep track of which
1081 /// blocks are visited in 'Visited'.
1082 ///
1083 /// This has special behavior for the first block queries (when SkipFirstBlock
1084 /// is true). In this special case, it ignores the contents of the specified
1085 /// block and starts returning dependence info for its predecessors.
1086 ///
1087 /// This function returns true on success, or false to indicate that it could
1088 /// not compute dependence information for some reason. This should be treated
1089 /// as a clobber dependence on the first instruction in the predecessor block.
1090 bool MemoryDependenceResults::getNonLocalPointerDepFromBB(
1091  Instruction *QueryInst, const PHITransAddr &Pointer,
1092  const MemoryLocation &Loc, bool isLoad, BasicBlock *StartBB,
1094  DenseMap<BasicBlock *, Value *> &Visited, bool SkipFirstBlock) {
1095  // Look up the cached info for Pointer.
1096  ValueIsLoadPair CacheKey(Pointer.getAddr(), isLoad);
1097 
1098  // Set up a temporary NLPI value. If the map doesn't yet have an entry for
1099  // CacheKey, this value will be inserted as the associated value. Otherwise,
1100  // it'll be ignored, and we'll have to check to see if the cached size and
1101  // aa tags are consistent with the current query.
1102  NonLocalPointerInfo InitialNLPI;
1103  InitialNLPI.Size = Loc.Size;
1104  InitialNLPI.AATags = Loc.AATags;
1105 
1106  // Get the NLPI for CacheKey, inserting one into the map if it doesn't
1107  // already have one.
1108  std::pair<CachedNonLocalPointerInfo::iterator, bool> Pair =
1109  NonLocalPointerDeps.insert(std::make_pair(CacheKey, InitialNLPI));
1110  NonLocalPointerInfo *CacheInfo = &Pair.first->second;
1111 
1112  // If we already have a cache entry for this CacheKey, we may need to do some
1113  // work to reconcile the cache entry and the current query.
1114  if (!Pair.second) {
1115  if (CacheInfo->Size < Loc.Size) {
1116  // The query's Size is greater than the cached one. Throw out the
1117  // cached data and proceed with the query at the greater size.
1118  CacheInfo->Pair = BBSkipFirstBlockPair();
1119  CacheInfo->Size = Loc.Size;
1120  for (auto &Entry : CacheInfo->NonLocalDeps)
1121  if (Instruction *Inst = Entry.getResult().getInst())
1122  RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
1123  CacheInfo->NonLocalDeps.clear();
1124  } else if (CacheInfo->Size > Loc.Size) {
1125  // This query's Size is less than the cached one. Conservatively restart
1126  // the query using the greater size.
1127  return getNonLocalPointerDepFromBB(
1128  QueryInst, Pointer, Loc.getWithNewSize(CacheInfo->Size), isLoad,
1129  StartBB, Result, Visited, SkipFirstBlock);
1130  }
1131 
1132  // If the query's AATags are inconsistent with the cached one,
1133  // conservatively throw out the cached data and restart the query with
1134  // no tag if needed.
1135  if (CacheInfo->AATags != Loc.AATags) {
1136  if (CacheInfo->AATags) {
1137  CacheInfo->Pair = BBSkipFirstBlockPair();
1138  CacheInfo->AATags = AAMDNodes();
1139  for (auto &Entry : CacheInfo->NonLocalDeps)
1140  if (Instruction *Inst = Entry.getResult().getInst())
1141  RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
1142  CacheInfo->NonLocalDeps.clear();
1143  }
1144  if (Loc.AATags)
1145  return getNonLocalPointerDepFromBB(
1146  QueryInst, Pointer, Loc.getWithoutAATags(), isLoad, StartBB, Result,
1147  Visited, SkipFirstBlock);
1148  }
1149  }
1150 
1151  NonLocalDepInfo *Cache = &CacheInfo->NonLocalDeps;
1152 
1153  // If we have valid cached information for exactly the block we are
1154  // investigating, just return it with no recomputation.
1155  if (CacheInfo->Pair == BBSkipFirstBlockPair(StartBB, SkipFirstBlock)) {
1156  // We have a fully cached result for this query then we can just return the
1157  // cached results and populate the visited set. However, we have to verify
1158  // that we don't already have conflicting results for these blocks. Check
1159  // to ensure that if a block in the results set is in the visited set that
1160  // it was for the same pointer query.
1161  if (!Visited.empty()) {
1162  for (auto &Entry : *Cache) {
1164  Visited.find(Entry.getBB());
1165  if (VI == Visited.end() || VI->second == Pointer.getAddr())
1166  continue;
1167 
1168  // We have a pointer mismatch in a block. Just return false, saying
1169  // that something was clobbered in this result. We could also do a
1170  // non-fully cached query, but there is little point in doing this.
1171  return false;
1172  }
1173  }
1174 
1175  Value *Addr = Pointer.getAddr();
1176  for (auto &Entry : *Cache) {
1177  Visited.insert(std::make_pair(Entry.getBB(), Addr));
1178  if (Entry.getResult().isNonLocal()) {
1179  continue;
1180  }
1181 
1182  if (DT.isReachableFromEntry(Entry.getBB())) {
1183  Result.push_back(
1184  NonLocalDepResult(Entry.getBB(), Entry.getResult(), Addr));
1185  }
1186  }
1187  ++NumCacheCompleteNonLocalPtr;
1188  return true;
1189  }
1190 
1191  // Otherwise, either this is a new block, a block with an invalid cache
1192  // pointer or one that we're about to invalidate by putting more info into it
1193  // than its valid cache info. If empty, the result will be valid cache info,
1194  // otherwise it isn't.
1195  if (Cache->empty())
1196  CacheInfo->Pair = BBSkipFirstBlockPair(StartBB, SkipFirstBlock);
1197  else
1198  CacheInfo->Pair = BBSkipFirstBlockPair();
1199 
1201  Worklist.push_back(StartBB);
1202 
1203  // PredList used inside loop.
1205 
1206  // Keep track of the entries that we know are sorted. Previously cached
1207  // entries will all be sorted. The entries we add we only sort on demand (we
1208  // don't insert every element into its sorted position). We know that we
1209  // won't get any reuse from currently inserted values, because we don't
1210  // revisit blocks after we insert info for them.
1211  unsigned NumSortedEntries = Cache->size();
1212  unsigned WorklistEntries = BlockNumberLimit;
1213  bool GotWorklistLimit = false;
1214  LLVM_DEBUG(AssertSorted(*Cache));
1215 
1216  while (!Worklist.empty()) {
1217  BasicBlock *BB = Worklist.pop_back_val();
1218 
1219  // If we do process a large number of blocks it becomes very expensive and
1220  // likely it isn't worth worrying about
1221  if (Result.size() > NumResultsLimit) {
1222  Worklist.clear();
1223  // Sort it now (if needed) so that recursive invocations of
1224  // getNonLocalPointerDepFromBB and other routines that could reuse the
1225  // cache value will only see properly sorted cache arrays.
1226  if (Cache && NumSortedEntries != Cache->size()) {
1227  SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
1228  }
1229  // Since we bail out, the "Cache" set won't contain all of the
1230  // results for the query. This is ok (we can still use it to accelerate
1231  // specific block queries) but we can't do the fastpath "return all
1232  // results from the set". Clear out the indicator for this.
1233  CacheInfo->Pair = BBSkipFirstBlockPair();
1234  return false;
1235  }
1236 
1237  // Skip the first block if we have it.
1238  if (!SkipFirstBlock) {
1239  // Analyze the dependency of *Pointer in FromBB. See if we already have
1240  // been here.
1241  assert(Visited.count(BB) && "Should check 'visited' before adding to WL");
1242 
1243  // Get the dependency info for Pointer in BB. If we have cached
1244  // information, we will use it, otherwise we compute it.
1245  LLVM_DEBUG(AssertSorted(*Cache, NumSortedEntries));
1246  MemDepResult Dep = GetNonLocalInfoForBlock(QueryInst, Loc, isLoad, BB,
1247  Cache, NumSortedEntries);
1248 
1249  // If we got a Def or Clobber, add this to the list of results.
1250  if (!Dep.isNonLocal()) {
1251  if (DT.isReachableFromEntry(BB)) {
1252  Result.push_back(NonLocalDepResult(BB, Dep, Pointer.getAddr()));
1253  continue;
1254  }
1255  }
1256  }
1257 
1258  // If 'Pointer' is an instruction defined in this block, then we need to do
1259  // phi translation to change it into a value live in the predecessor block.
1260  // If not, we just add the predecessors to the worklist and scan them with
1261  // the same Pointer.
1262  if (!Pointer.NeedsPHITranslationFromBlock(BB)) {
1263  SkipFirstBlock = false;
1265  for (BasicBlock *Pred : PredCache.get(BB)) {
1266  // Verify that we haven't looked at this block yet.
1267  std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> InsertRes =
1268  Visited.insert(std::make_pair(Pred, Pointer.getAddr()));
1269  if (InsertRes.second) {
1270  // First time we've looked at *PI.
1271  NewBlocks.push_back(Pred);
1272  continue;
1273  }
1274 
1275  // If we have seen this block before, but it was with a different
1276  // pointer then we have a phi translation failure and we have to treat
1277  // this as a clobber.
1278  if (InsertRes.first->second != Pointer.getAddr()) {
1279  // Make sure to clean up the Visited map before continuing on to
1280  // PredTranslationFailure.
1281  for (unsigned i = 0; i < NewBlocks.size(); i++)
1282  Visited.erase(NewBlocks[i]);
1283  goto PredTranslationFailure;
1284  }
1285  }
1286  if (NewBlocks.size() > WorklistEntries) {
1287  // Make sure to clean up the Visited map before continuing on to
1288  // PredTranslationFailure.
1289  for (unsigned i = 0; i < NewBlocks.size(); i++)
1290  Visited.erase(NewBlocks[i]);
1291  GotWorklistLimit = true;
1292  goto PredTranslationFailure;
1293  }
1294  WorklistEntries -= NewBlocks.size();
1295  Worklist.append(NewBlocks.begin(), NewBlocks.end());
1296  continue;
1297  }
1298 
1299  // We do need to do phi translation, if we know ahead of time we can't phi
1300  // translate this value, don't even try.
1301  if (!Pointer.IsPotentiallyPHITranslatable())
1302  goto PredTranslationFailure;
1303 
1304  // We may have added values to the cache list before this PHI translation.
1305  // If so, we haven't done anything to ensure that the cache remains sorted.
1306  // Sort it now (if needed) so that recursive invocations of
1307  // getNonLocalPointerDepFromBB and other routines that could reuse the cache
1308  // value will only see properly sorted cache arrays.
1309  if (Cache && NumSortedEntries != Cache->size()) {
1310  SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
1311  NumSortedEntries = Cache->size();
1312  }
1313  Cache = nullptr;
1314 
1315  PredList.clear();
1316  for (BasicBlock *Pred : PredCache.get(BB)) {
1317  PredList.push_back(std::make_pair(Pred, Pointer));
1318 
1319  // Get the PHI translated pointer in this predecessor. This can fail if
1320  // not translatable, in which case the getAddr() returns null.
1321  PHITransAddr &PredPointer = PredList.back().second;
1322  PredPointer.PHITranslateValue(BB, Pred, &DT, /*MustDominate=*/false);
1323  Value *PredPtrVal = PredPointer.getAddr();
1324 
1325  // Check to see if we have already visited this pred block with another
1326  // pointer. If so, we can't do this lookup. This failure can occur
1327  // with PHI translation when a critical edge exists and the PHI node in
1328  // the successor translates to a pointer value different than the
1329  // pointer the block was first analyzed with.
1330  std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> InsertRes =
1331  Visited.insert(std::make_pair(Pred, PredPtrVal));
1332 
1333  if (!InsertRes.second) {
1334  // We found the pred; take it off the list of preds to visit.
1335  PredList.pop_back();
1336 
1337  // If the predecessor was visited with PredPtr, then we already did
1338  // the analysis and can ignore it.
1339  if (InsertRes.first->second == PredPtrVal)
1340  continue;
1341 
1342  // Otherwise, the block was previously analyzed with a different
1343  // pointer. We can't represent the result of this case, so we just
1344  // treat this as a phi translation failure.
1345 
1346  // Make sure to clean up the Visited map before continuing on to
1347  // PredTranslationFailure.
1348  for (unsigned i = 0, n = PredList.size(); i < n; ++i)
1349  Visited.erase(PredList[i].first);
1350 
1351  goto PredTranslationFailure;
1352  }
1353  }
1354 
1355  // Actually process results here; this need to be a separate loop to avoid
1356  // calling getNonLocalPointerDepFromBB for blocks we don't want to return
1357  // any results for. (getNonLocalPointerDepFromBB will modify our
1358  // datastructures in ways the code after the PredTranslationFailure label
1359  // doesn't expect.)
1360  for (unsigned i = 0, n = PredList.size(); i < n; ++i) {
1361  BasicBlock *Pred = PredList[i].first;
1362  PHITransAddr &PredPointer = PredList[i].second;
1363  Value *PredPtrVal = PredPointer.getAddr();
1364 
1365  bool CanTranslate = true;
1366  // If PHI translation was unable to find an available pointer in this
1367  // predecessor, then we have to assume that the pointer is clobbered in
1368  // that predecessor. We can still do PRE of the load, which would insert
1369  // a computation of the pointer in this predecessor.
1370  if (!PredPtrVal)
1371  CanTranslate = false;
1372 
1373  // FIXME: it is entirely possible that PHI translating will end up with
1374  // the same value. Consider PHI translating something like:
1375  // X = phi [x, bb1], [y, bb2]. PHI translating for bb1 doesn't *need*
1376  // to recurse here, pedantically speaking.
1377 
1378  // If getNonLocalPointerDepFromBB fails here, that means the cached
1379  // result conflicted with the Visited list; we have to conservatively
1380  // assume it is unknown, but this also does not block PRE of the load.
1381  if (!CanTranslate ||
1382  !getNonLocalPointerDepFromBB(QueryInst, PredPointer,
1383  Loc.getWithNewPtr(PredPtrVal), isLoad,
1384  Pred, Result, Visited)) {
1385  // Add the entry to the Result list.
1386  NonLocalDepResult Entry(Pred, MemDepResult::getUnknown(), PredPtrVal);
1387  Result.push_back(Entry);
1388 
1389  // Since we had a phi translation failure, the cache for CacheKey won't
1390  // include all of the entries that we need to immediately satisfy future
1391  // queries. Mark this in NonLocalPointerDeps by setting the
1392  // BBSkipFirstBlockPair pointer to null. This requires reuse of the
1393  // cached value to do more work but not miss the phi trans failure.
1394  NonLocalPointerInfo &NLPI = NonLocalPointerDeps[CacheKey];
1395  NLPI.Pair = BBSkipFirstBlockPair();
1396  continue;
1397  }
1398  }
1399 
1400  // Refresh the CacheInfo/Cache pointer so that it isn't invalidated.
1401  CacheInfo = &NonLocalPointerDeps[CacheKey];
1402  Cache = &CacheInfo->NonLocalDeps;
1403  NumSortedEntries = Cache->size();
1404 
1405  // Since we did phi translation, the "Cache" set won't contain all of the
1406  // results for the query. This is ok (we can still use it to accelerate
1407  // specific block queries) but we can't do the fastpath "return all
1408  // results from the set" Clear out the indicator for this.
1409  CacheInfo->Pair = BBSkipFirstBlockPair();
1410  SkipFirstBlock = false;
1411  continue;
1412 
1413  PredTranslationFailure:
1414  // The following code is "failure"; we can't produce a sane translation
1415  // for the given block. It assumes that we haven't modified any of
1416  // our datastructures while processing the current block.
1417 
1418  if (!Cache) {
1419  // Refresh the CacheInfo/Cache pointer if it got invalidated.
1420  CacheInfo = &NonLocalPointerDeps[CacheKey];
1421  Cache = &CacheInfo->NonLocalDeps;
1422  NumSortedEntries = Cache->size();
1423  }
1424 
1425  // Since we failed phi translation, the "Cache" set won't contain all of the
1426  // results for the query. This is ok (we can still use it to accelerate
1427  // specific block queries) but we can't do the fastpath "return all
1428  // results from the set". Clear out the indicator for this.
1429  CacheInfo->Pair = BBSkipFirstBlockPair();
1430 
1431  // If *nothing* works, mark the pointer as unknown.
1432  //
1433  // If this is the magic first block, return this as a clobber of the whole
1434  // incoming value. Since we can't phi translate to one of the predecessors,
1435  // we have to bail out.
1436  if (SkipFirstBlock)
1437  return false;
1438 
1439  bool foundBlock = false;
1440  for (NonLocalDepEntry &I : llvm::reverse(*Cache)) {
1441  if (I.getBB() != BB)
1442  continue;
1443 
1444  assert((GotWorklistLimit || I.getResult().isNonLocal() ||
1445  !DT.isReachableFromEntry(BB)) &&
1446  "Should only be here with transparent block");
1447  foundBlock = true;
1448  I.setResult(MemDepResult::getUnknown());
1449  Result.push_back(
1450  NonLocalDepResult(I.getBB(), I.getResult(), Pointer.getAddr()));
1451  break;
1452  }
1453  (void)foundBlock; (void)GotWorklistLimit;
1454  assert((foundBlock || GotWorklistLimit) && "Current block not in cache?");
1455  }
1456 
1457  // Okay, we're done now. If we added new values to the cache, re-sort it.
1458  SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
1459  LLVM_DEBUG(AssertSorted(*Cache));
1460  return true;
1461 }
1462 
1463 /// If P exists in CachedNonLocalPointerInfo or NonLocalDefsCache, remove it.
1464 void MemoryDependenceResults::RemoveCachedNonLocalPointerDependencies(
1465  ValueIsLoadPair P) {
1466 
1467  // Most of the time this cache is empty.
1468  if (!NonLocalDefsCache.empty()) {
1469  auto it = NonLocalDefsCache.find(P.getPointer());
1470  if (it != NonLocalDefsCache.end()) {
1471  RemoveFromReverseMap(ReverseNonLocalDefsCache,
1472  it->second.getResult().getInst(), P.getPointer());
1473  NonLocalDefsCache.erase(it);
1474  }
1475 
1476  if (auto *I = dyn_cast<Instruction>(P.getPointer())) {
1477  auto toRemoveIt = ReverseNonLocalDefsCache.find(I);
1478  if (toRemoveIt != ReverseNonLocalDefsCache.end()) {
1479  for (const auto &entry : toRemoveIt->second)
1480  NonLocalDefsCache.erase(entry);
1481  ReverseNonLocalDefsCache.erase(toRemoveIt);
1482  }
1483  }
1484  }
1485 
1486  CachedNonLocalPointerInfo::iterator It = NonLocalPointerDeps.find(P);
1487  if (It == NonLocalPointerDeps.end())
1488  return;
1489 
1490  // Remove all of the entries in the BB->val map. This involves removing
1491  // instructions from the reverse map.
1492  NonLocalDepInfo &PInfo = It->second.NonLocalDeps;
1493 
1494  for (unsigned i = 0, e = PInfo.size(); i != e; ++i) {
1495  Instruction *Target = PInfo[i].getResult().getInst();
1496  if (!Target)
1497  continue; // Ignore non-local dep results.
1498  assert(Target->getParent() == PInfo[i].getBB());
1499 
1500  // Eliminating the dirty entry from 'Cache', so update the reverse info.
1501  RemoveFromReverseMap(ReverseNonLocalPtrDeps, Target, P);
1502  }
1503 
1504  // Remove P from NonLocalPointerDeps (which deletes NonLocalDepInfo).
1505  NonLocalPointerDeps.erase(It);
1506 }
1507 
1509  // If Ptr isn't really a pointer, just ignore it.
1510  if (!Ptr->getType()->isPointerTy())
1511  return;
1512  // Flush store info for the pointer.
1513  RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, false));
1514  // Flush load info for the pointer.
1515  RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, true));
1516 }
1517 
1519  PredCache.clear();
1520 }
1521 
1523  // Walk through the Non-local dependencies, removing this one as the value
1524  // for any cached queries.
1525  NonLocalDepMapType::iterator NLDI = NonLocalDeps.find(RemInst);
1526  if (NLDI != NonLocalDeps.end()) {
1527  NonLocalDepInfo &BlockMap = NLDI->second.first;
1528  for (auto &Entry : BlockMap)
1529  if (Instruction *Inst = Entry.getResult().getInst())
1530  RemoveFromReverseMap(ReverseNonLocalDeps, Inst, RemInst);
1531  NonLocalDeps.erase(NLDI);
1532  }
1533 
1534  // If we have a cached local dependence query for this instruction, remove it.
1535  LocalDepMapType::iterator LocalDepEntry = LocalDeps.find(RemInst);
1536  if (LocalDepEntry != LocalDeps.end()) {
1537  // Remove us from DepInst's reverse set now that the local dep info is gone.
1538  if (Instruction *Inst = LocalDepEntry->second.getInst())
1539  RemoveFromReverseMap(ReverseLocalDeps, Inst, RemInst);
1540 
1541  // Remove this local dependency info.
1542  LocalDeps.erase(LocalDepEntry);
1543  }
1544 
1545  // If we have any cached pointer dependencies on this instruction, remove
1546  // them. If the instruction has non-pointer type, then it can't be a pointer
1547  // base.
1548 
1549  // Remove it from both the load info and the store info. The instruction
1550  // can't be in either of these maps if it is non-pointer.
1551  if (RemInst->getType()->isPointerTy()) {
1552  RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, false));
1553  RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, true));
1554  }
1555 
1556  // Loop over all of the things that depend on the instruction we're removing.
1558 
1559  // If we find RemInst as a clobber or Def in any of the maps for other values,
1560  // we need to replace its entry with a dirty version of the instruction after
1561  // it. If RemInst is a terminator, we use a null dirty value.
1562  //
1563  // Using a dirty version of the instruction after RemInst saves having to scan
1564  // the entire block to get to this point.
1565  MemDepResult NewDirtyVal;
1566  if (!RemInst->isTerminator())
1567  NewDirtyVal = MemDepResult::getDirty(&*++RemInst->getIterator());
1568 
1569  ReverseDepMapType::iterator ReverseDepIt = ReverseLocalDeps.find(RemInst);
1570  if (ReverseDepIt != ReverseLocalDeps.end()) {
1571  // RemInst can't be the terminator if it has local stuff depending on it.
1572  assert(!ReverseDepIt->second.empty() && !isa<TerminatorInst>(RemInst) &&
1573  "Nothing can locally depend on a terminator");
1574 
1575  for (Instruction *InstDependingOnRemInst : ReverseDepIt->second) {
1576  assert(InstDependingOnRemInst != RemInst &&
1577  "Already removed our local dep info");
1578 
1579  LocalDeps[InstDependingOnRemInst] = NewDirtyVal;
1580 
1581  // Make sure to remember that new things depend on NewDepInst.
1582  assert(NewDirtyVal.getInst() &&
1583  "There is no way something else can have "
1584  "a local dep on this if it is a terminator!");
1585  ReverseDepsToAdd.push_back(
1586  std::make_pair(NewDirtyVal.getInst(), InstDependingOnRemInst));
1587  }
1588 
1589  ReverseLocalDeps.erase(ReverseDepIt);
1590 
1591  // Add new reverse deps after scanning the set, to avoid invalidating the
1592  // 'ReverseDeps' reference.
1593  while (!ReverseDepsToAdd.empty()) {
1594  ReverseLocalDeps[ReverseDepsToAdd.back().first].insert(
1595  ReverseDepsToAdd.back().second);
1596  ReverseDepsToAdd.pop_back();
1597  }
1598  }
1599 
1600  ReverseDepIt = ReverseNonLocalDeps.find(RemInst);
1601  if (ReverseDepIt != ReverseNonLocalDeps.end()) {
1602  for (Instruction *I : ReverseDepIt->second) {
1603  assert(I != RemInst && "Already removed NonLocalDep info for RemInst");
1604 
1605  PerInstNLInfo &INLD = NonLocalDeps[I];
1606  // The information is now dirty!
1607  INLD.second = true;
1608 
1609  for (auto &Entry : INLD.first) {
1610  if (Entry.getResult().getInst() != RemInst)
1611  continue;
1612 
1613  // Convert to a dirty entry for the subsequent instruction.
1614  Entry.setResult(NewDirtyVal);
1615 
1616  if (Instruction *NextI = NewDirtyVal.getInst())
1617  ReverseDepsToAdd.push_back(std::make_pair(NextI, I));
1618  }
1619  }
1620 
1621  ReverseNonLocalDeps.erase(ReverseDepIt);
1622 
1623  // Add new reverse deps after scanning the set, to avoid invalidating 'Set'
1624  while (!ReverseDepsToAdd.empty()) {
1625  ReverseNonLocalDeps[ReverseDepsToAdd.back().first].insert(
1626  ReverseDepsToAdd.back().second);
1627  ReverseDepsToAdd.pop_back();
1628  }
1629  }
1630 
1631  // If the instruction is in ReverseNonLocalPtrDeps then it appears as a
1632  // value in the NonLocalPointerDeps info.
1633  ReverseNonLocalPtrDepTy::iterator ReversePtrDepIt =
1634  ReverseNonLocalPtrDeps.find(RemInst);
1635  if (ReversePtrDepIt != ReverseNonLocalPtrDeps.end()) {
1637  ReversePtrDepsToAdd;
1638 
1639  for (ValueIsLoadPair P : ReversePtrDepIt->second) {
1640  assert(P.getPointer() != RemInst &&
1641  "Already removed NonLocalPointerDeps info for RemInst");
1642 
1643  NonLocalDepInfo &NLPDI = NonLocalPointerDeps[P].NonLocalDeps;
1644 
1645  // The cache is not valid for any specific block anymore.
1646  NonLocalPointerDeps[P].Pair = BBSkipFirstBlockPair();
1647 
1648  // Update any entries for RemInst to use the instruction after it.
1649  for (auto &Entry : NLPDI) {
1650  if (Entry.getResult().getInst() != RemInst)
1651  continue;
1652 
1653  // Convert to a dirty entry for the subsequent instruction.
1654  Entry.setResult(NewDirtyVal);
1655 
1656  if (Instruction *NewDirtyInst = NewDirtyVal.getInst())
1657  ReversePtrDepsToAdd.push_back(std::make_pair(NewDirtyInst, P));
1658  }
1659 
1660  // Re-sort the NonLocalDepInfo. Changing the dirty entry to its
1661  // subsequent value may invalidate the sortedness.
1662  llvm::sort(NLPDI.begin(), NLPDI.end());
1663  }
1664 
1665  ReverseNonLocalPtrDeps.erase(ReversePtrDepIt);
1666 
1667  while (!ReversePtrDepsToAdd.empty()) {
1668  ReverseNonLocalPtrDeps[ReversePtrDepsToAdd.back().first].insert(
1669  ReversePtrDepsToAdd.back().second);
1670  ReversePtrDepsToAdd.pop_back();
1671  }
1672  }
1673 
1674  assert(!NonLocalDeps.count(RemInst) && "RemInst got reinserted?");
1675  LLVM_DEBUG(verifyRemoved(RemInst));
1676 }
1677 
1678 /// Verify that the specified instruction does not occur in our internal data
1679 /// structures.
1680 ///
1681 /// This function verifies by asserting in debug builds.
1682 void MemoryDependenceResults::verifyRemoved(Instruction *D) const {
1683 #ifndef NDEBUG
1684  for (const auto &DepKV : LocalDeps) {
1685  assert(DepKV.first != D && "Inst occurs in data structures");
1686  assert(DepKV.second.getInst() != D && "Inst occurs in data structures");
1687  }
1688 
1689  for (const auto &DepKV : NonLocalPointerDeps) {
1690  assert(DepKV.first.getPointer() != D && "Inst occurs in NLPD map key");
1691  for (const auto &Entry : DepKV.second.NonLocalDeps)
1692  assert(Entry.getResult().getInst() != D && "Inst occurs as NLPD value");
1693  }
1694 
1695  for (const auto &DepKV : NonLocalDeps) {
1696  assert(DepKV.first != D && "Inst occurs in data structures");
1697  const PerInstNLInfo &INLD = DepKV.second;
1698  for (const auto &Entry : INLD.first)
1699  assert(Entry.getResult().getInst() != D &&
1700  "Inst occurs in data structures");
1701  }
1702 
1703  for (const auto &DepKV : ReverseLocalDeps) {
1704  assert(DepKV.first != D && "Inst occurs in data structures");
1705  for (Instruction *Inst : DepKV.second)
1706  assert(Inst != D && "Inst occurs in data structures");
1707  }
1708 
1709  for (const auto &DepKV : ReverseNonLocalDeps) {
1710  assert(DepKV.first != D && "Inst occurs in data structures");
1711  for (Instruction *Inst : DepKV.second)
1712  assert(Inst != D && "Inst occurs in data structures");
1713  }
1714 
1715  for (const auto &DepKV : ReverseNonLocalPtrDeps) {
1716  assert(DepKV.first != D && "Inst occurs in rev NLPD map");
1717 
1718  for (ValueIsLoadPair P : DepKV.second)
1719  assert(P != ValueIsLoadPair(D, false) && P != ValueIsLoadPair(D, true) &&
1720  "Inst occurs in ReverseNonLocalPtrDeps map");
1721  }
1722 #endif
1723 }
1724 
1725 AnalysisKey MemoryDependenceAnalysis::Key;
1726 
1729  auto &AA = AM.getResult<AAManager>(F);
1730  auto &AC = AM.getResult<AssumptionAnalysis>(F);
1731  auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
1732  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1733  return MemoryDependenceResults(AA, AC, TLI, DT);
1734 }
1735 
1737 
1739  "Memory Dependence Analysis", false, true)
1746 
1747 MemoryDependenceWrapperPass::MemoryDependenceWrapperPass() : FunctionPass(ID) {
1749 }
1750 
1752 
1754  MemDep.reset();
1755 }
1756 
1758  AU.setPreservesAll();
1763 }
1764 
1767  // Check whether our analysis is preserved.
1768  auto PAC = PA.getChecker<MemoryDependenceAnalysis>();
1769  if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>())
1770  // If not, give up now.
1771  return true;
1772 
1773  // Check whether the analyses we depend on became invalid for any reason.
1774  if (Inv.invalidate<AAManager>(F, PA) ||
1775  Inv.invalidate<AssumptionAnalysis>(F, PA) ||
1776  Inv.invalidate<DominatorTreeAnalysis>(F, PA))
1777  return true;
1778 
1779  // Otherwise this analysis result remains valid.
1780  return false;
1781 }
1782 
1784  return BlockScanLimit;
1785 }
1786 
1788  auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
1789  auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1790  auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
1791  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1792  MemDep.emplace(AA, AC, TLI, DT);
1793  return false;
1794 }
The access may reference and may modify the value stored in memory.
void invalidateCachedPointerInfo(Value *Ptr)
Invalidates cached information about the specified pointer, because it may be too conservative in mem...
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
void invalidateCachedPredecessors()
Clears the PredIteratorCache info.
MemoryDependenceResults(AliasAnalysis &AA, AssumptionCache &AC, const TargetLibraryInfo &TLI, DominatorTree &DT)
bool invalidate(IRUnitT &IR, const PreservedAnalyses &PA)
Trigger the invalidation of some other analysis pass if not already handled and return whether it was...
Definition: PassManager.h:577
bool isSimple() const
Definition: Instructions.h:266
iterator_range< use_iterator > uses()
Definition: Value.h:354
The access neither references nor modifies the value stored in memory.
bool isStrongerThanUnordered(AtomicOrdering ao)
Provides a lazy, caching interface for making common memory aliasing information queries, backed by LLVM&#39;s alias analysis passes.
LLVM_NODISCARD ModRefInfo clearMust(const ModRefInfo MRI)
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Atomic ordering constants.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:687
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
PointerTy getPointer() const
MemDepResult getInvariantGroupPointerDependency(LoadInst *LI, BasicBlock *BB)
This analysis looks for other loads and stores with invariant.group metadata and the same pointer ope...
An instruction for ordering other memory operations.
Definition: Instructions.h:444
MemoryLocation getWithNewPtr(const Value *NewPtr) const
This class provides various memory handling functions that manipulate MemoryBlock instances...
Definition: Memory.h:46
This class represents a function call, abstracting a target machine&#39;s calling convention.
bool isNonLocal() const
Tests if this MemDepResult represents a query that is transparent to the start of the block...
This file contains the declarations for metadata subclasses.
An immutable pass that tracks lazily created AssumptionCache objects.
static cl::opt< unsigned > BlockNumberLimit("memdep-block-number-limit", cl::Hidden, cl::init(1000), cl::desc("The number of blocks to scan during memory " "dependency analysis (default = 1000)"))
bool mayWriteToMemory() const
Return true if this instruction may modify memory.
The two locations do not alias at all.
Definition: AliasAnalysis.h:85
bool onlyReadsMemory(ImmutableCallSite CS)
Checks if the specified call is known to only read from non-volatile memory (or not access memory at ...
bool isTerminator() const
Definition: Instruction.h:129
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:307
STATISTIC(NumFunctions, "Total number of functions")
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:225
F(f)
An instruction for reading from memory.
Definition: Instructions.h:168
Hexagon Common GEP
This defines the Use class.
LLVM_NODISCARD bool isModAndRefSet(const ModRefInfo MRI)
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:295
const CallInst * isFreeCall(const Value *I, const TargetLibraryInfo *TLI)
isFreeCall - Returns non-null if the value is a call to the builtin free()
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:264
PreservedAnalysisChecker getChecker() const
Build a checker for this PreservedAnalyses and the specified analysis type.
Definition: PassManager.h:304
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:191
AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
The main low level interface to the alias analysis implementation.
AnalysisUsage & addRequired()
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:134
ArrayRef< BasicBlock * > get(BasicBlock *BB)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
bool isDef() const
Tests if this MemDepResult represents a query that is an instruction definition dependency.
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:361
bool isClobber() const
Tests if this MemDepResult represents a query that is an instruction clobber dependency.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
static unsigned getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs, unsigned MemLocSize, const LoadInst *LI)
Looks at a memory location for a load (specified by MemLocBase, Offs, and Size) and compares it again...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
static bool isLoad(int Opcode)
unsigned getDefaultBlockScanLimit() const
Some methods limit the number of instructions they will examine.
void getNonLocalPointerDependency(Instruction *QueryInst, SmallVectorImpl< NonLocalDepResult > &Result)
Perform a full dependency query for an access to the QueryInst&#39;s specified memory location...
static MemDepResult getDef(Instruction *Inst)
get methods: These are static ctor methods for creating various MemDepResult kinds.
The access may reference the value stored in memory.
This file contains the simple types necessary to represent the attributes associated with functions a...
InstrTy * getInstruction() const
Definition: CallSite.h:92
An analysis that produces MemoryDependenceResults for a function.
static bool isOrdered(const Instruction *I)
Definition: MemorySSA.cpp:1516
ELFYAML::ELF_STO Other
Definition: ELFYAML.cpp:773
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:250
bool isNoAliasFn(const Value *V, const TargetLibraryInfo *TLI, bool LookThroughBitCast=false)
Tests if a value is a call or invoke to a function that returns a NoAlias pointer (including malloc/c...
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
static void RemoveFromReverseMap(DenseMap< Instruction *, SmallPtrSet< KeyTy, 4 >> &ReverseMap, Instruction *Inst, KeyTy Val)
This is a helper function that removes Val from &#39;Inst&#39;s set in ReverseMap.
Value * GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, const DataLayout &DL)
Analyze the specified pointer to see if it can be expressed as a base pointer plus a constant offset...
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:200
An instruction for storing to memory.
Definition: Instructions.h:310
AliasResult
The possible results of an alias query.
Definition: AliasAnalysis.h:79
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:146
bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal=false)
Checks whether the given location points to constant memory, or if OrLocal is true whether it points ...
static MemDepResult getUnknown()
const BasicBlock & getEntryBlock() const
Definition: Function.h:626
#define P(N)
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:410
const NonLocalDepInfo & getNonLocalCallDependency(CallSite QueryCS)
Perform a full dependency query for the specified call, returning the set of blocks that the value is...
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:153
bool isIdenticalToWhenDefined(const Instruction *I) const
This is like isIdenticalTo, except that it ignores the SubclassOptionalData flags, which may specify conditions under which the instruction&#39;s result is undefined.
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
PointerIntPair - This class implements a pair of a pointer and small integer.
PHITransAddr - An address value which tracks and handles phi translation.
Definition: PHITransAddr.h:36
MemoryLocation getWithoutAATags() const
MemoryLocation getWithNewSize(LocationSize NewSize) const
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:124
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:224
A manager for alias analyses.
This is a result from a NonLocal dependence query.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
MemDepResult getSimplePointerDependencyFrom(const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt, BasicBlock *BB, Instruction *QueryInst, unsigned *Limit=nullptr)
Represent the analysis usage information of a pass.
static MemDepResult getNonFuncLocal()
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
LocationSize Size
The maximum size of the location, in address-units, or UnknownSize if the size is not known...
Value * getPointerOperand()
Definition: Instructions.h:274
self_iterator getIterator()
Definition: ilist_node.h:82
void setResult(const MemDepResult &R)
uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition: MathExtras.h:640
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs, and aliases.
Definition: Value.cpp:539
This class represents the va_arg llvm instruction, which returns an argument of the specified type gi...
size_t size() const
Definition: SmallVector.h:53
INITIALIZE_PASS_BEGIN(MemoryDependenceWrapperPass, "memdep", "Memory Dependence Analysis", false, true) INITIALIZE_PASS_END(MemoryDependenceWrapperPass
static void SortNonLocalDepInfoCache(MemoryDependenceResults::NonLocalDepInfo &Cache, unsigned NumSortedEntries)
Sort the NonLocalDepInfo cache, given a certain number of elements in the array that are already prop...
A wrapper analysis pass for the legacy pass manager that exposes a MemoryDepnedenceResults instance...
Value * GetUnderlyingObject(Value *V, const DataLayout &DL, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value...
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
static MemoryLocation getForArgument(ImmutableCallSite CS, unsigned ArgIdx, const TargetLibraryInfo &TLI)
Return a location representing a particular argument of a call.
A memory dependence query can return one of three different answers.
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:872
static cl::opt< unsigned > BlockScanLimit("memdep-block-scan-limit", cl::Hidden, cl::init(100), cl::desc("The number of instructions to scan in a block in memory " "dependency analysis (default = 100)"))
void clear()
clear - Remove all information.
const Value * Ptr
The address of the start of the location.
Representation for a specific memory location.
size_type size() const
Definition: SmallPtrSet.h:93
A function analysis which provides an AssumptionCache.
The two locations precisely alias each other.
Definition: AliasAnalysis.h:91
MemDepResult getPointerDependencyFrom(const MemoryLocation &Loc, bool isLoad, BasicBlock::iterator ScanIt, BasicBlock *BB, Instruction *QueryInst=nullptr, unsigned *Limit=nullptr)
Returns the instruction on which a memory location depends.
Iterator for intrusive lists based on ilist_node.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
iterator end()
Definition: BasicBlock.h:266
bool PHITranslateValue(BasicBlock *CurBB, BasicBlock *PredBB, const DominatorTree *DT, bool MustDominate)
PHITranslateValue - PHI translate the current address up the CFG from CurBB to Pred, updating our state to reflect any needed changes.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:841
bool dominates(const Instruction *Def, const Use &U) const
Return true if Def dominates a use in User.
Definition: Dominators.cpp:244
Module.h This file contains the declarations for the Module class.
Provides information about what library functions are available for the current target.
static MemDepResult getClobber(Instruction *Inst)
MemoryDependenceResults run(Function &F, FunctionAnalysisManager &AM)
const MemDepResult & getResult() const
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:642
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:376
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
void getAnalysisUsage(AnalysisUsage &AU) const override
Does not modify anything. It uses Value Numbering and Alias Analysis.
The access may modify the value stored in memory.
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:56
Target - Wrapper for Target specific information.
void releaseMemory() override
Clean up memory in between runs.
bool NeedsPHITranslationFromBlock(BasicBlock *BB) const
NeedsPHITranslationFromBlock - Return true if moving from the specified BasicBlock to its predecessor...
Definition: PHITransAddr.h:64
ModRefInfo callCapturesBefore(const Instruction *I, const MemoryLocation &MemLoc, DominatorTree *DT, OrderedBasicBlock *OBB=nullptr)
Return information about whether a particular call site modifies or reads the specified memory locati...
ModRefInfo getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc)
getModRefInfo (for call sites) - Return information about whether a particular call site modifies or ...
void setPreservesAll()
Set by analyses that do not transform their input at all.
LLVM_NODISCARD bool isNoModRef(const ModRefInfo MRI)
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:91
bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
A trivial helper function to check to see if the specified pointers are must-alias.
std::vector< NonLocalDepEntry > NonLocalDepInfo
bool runOnFunction(Function &) override
Pass Implementation stuff. This doesn&#39;t do any analysis eagerly.
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:389
Basic Alias true
LLVM_NODISCARD bool isModSet(const ModRefInfo MRI)
bool isUnknown() const
Tests if this MemDepResult represents a query which cannot and/or will not be computed.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:128
AAMDNodes AATags
The metadata nodes which describes the aliasing of the location (each member is null if that kind of ...
static ModRefInfo GetLocation(const Instruction *Inst, MemoryLocation &Loc, const TargetLibraryInfo &TLI)
If the given instruction references a specific memory location, fill in Loc with the details...
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:230
Instruction * getInst() const
If this is a normal dependency, returns the instruction that is depended on.
void initializeMemoryDependenceWrapperPassPass(PassRegistry &)
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:56
This file provides utility analysis objects describing memory locations.
bool invalidate(Function &F, const PreservedAnalyses &PA, FunctionAnalysisManager::Invalidator &Inv)
Handle invalidation in the new PM.
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:108
#define I(x, y, z)
Definition: MD5.cpp:58
bool mayReadFromMemory() const
Return true if this instruction may read memory.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
AnalysisUsage & addRequiredTransitive()
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:141
bool IsPotentiallyPHITranslatable() const
IsPotentiallyPHITranslatable - If this needs PHI translation, return true if we have some hope of doi...
Memory Dependence Analysis
Value * getAddr() const
Definition: PHITransAddr.h:60
API to communicate dependencies between analyses during invalidation.
Definition: PassManager.h:559
Analysis pass providing the TargetLibraryInfo.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This templated class represents "all analyses that operate over <a particular IR unit>" (e...
Definition: PassManager.h:91
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:115
LLVM Value Representation.
Definition: Value.h:73
void removeInstruction(Instruction *InstToRemove)
Removes an instruction from the dependence analysis, updating the dependence of instructions that pre...
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
print Instructions which execute on loop entry
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:238
This is an entry in the NonLocalDepInfo cache.
A container for analyses that lazily runs them and caches their results.
bool fitsInLegalInteger(unsigned Width) const
Returns true if the specified type fits in a native integer type supported by the CPU...
Definition: DataLayout.h:317
static void AssertSorted(MemoryDependenceResults::NonLocalDepInfo &Cache, int Count=-1)
This method is used when -debug is specified to verify that cache arrays are properly kept sorted...
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:254
LLVM_NODISCARD bool isModOrRefSet(const ModRefInfo MRI)
static bool isVolatile(Instruction *Inst)
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object...
The two locations alias, but only due to a partial overlap.
Definition: AliasAnalysis.h:89
Dependence - This class represents a dependence between two memory memory references in a function...
#define LLVM_DEBUG(X)
Definition: Debug.h:119
static MemDepResult getNonLocal()
static const unsigned int NumResultsLimit
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: PassManager.h:70
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:44
const BasicBlock * getParent() const
Definition: Instruction.h:67
MemDepResult getDependency(Instruction *QueryInst)
Returns the instruction on which a memory operation depends.