LLVM  8.0.0svn
MemoryDependenceAnalysis.cpp
Go to the documentation of this file.
1 //===- MemoryDependenceAnalysis.cpp - Mem Deps Implementation -------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements an analysis that determines, for a given memory
11 // operation, what preceding memory operations it depends on. It builds on
12 // alias analysis information, and tries to provide a lazy, caching interface to
13 // a common kind of alias information query.
14 //
15 //===----------------------------------------------------------------------===//
16 
18 #include "llvm/ADT/DenseMap.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/SmallPtrSet.h"
21 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/ADT/Statistic.h"
32 #include "llvm/IR/Attributes.h"
33 #include "llvm/IR/BasicBlock.h"
34 #include "llvm/IR/CallSite.h"
35 #include "llvm/IR/Constants.h"
36 #include "llvm/IR/DataLayout.h"
37 #include "llvm/IR/DerivedTypes.h"
38 #include "llvm/IR/Dominators.h"
39 #include "llvm/IR/Function.h"
40 #include "llvm/IR/InstrTypes.h"
41 #include "llvm/IR/Instruction.h"
42 #include "llvm/IR/Instructions.h"
43 #include "llvm/IR/IntrinsicInst.h"
44 #include "llvm/IR/LLVMContext.h"
45 #include "llvm/IR/Metadata.h"
46 #include "llvm/IR/Module.h"
48 #include "llvm/IR/Type.h"
49 #include "llvm/IR/Use.h"
50 #include "llvm/IR/User.h"
51 #include "llvm/IR/Value.h"
52 #include "llvm/Pass.h"
54 #include "llvm/Support/Casting.h"
56 #include "llvm/Support/Compiler.h"
57 #include "llvm/Support/Debug.h"
59 #include <algorithm>
60 #include <cassert>
61 #include <cstdint>
62 #include <iterator>
63 #include <utility>
64 
65 using namespace llvm;
66 
67 #define DEBUG_TYPE "memdep"
68 
69 STATISTIC(NumCacheNonLocal, "Number of fully cached non-local responses");
70 STATISTIC(NumCacheDirtyNonLocal, "Number of dirty cached non-local responses");
71 STATISTIC(NumUncacheNonLocal, "Number of uncached non-local responses");
72 
73 STATISTIC(NumCacheNonLocalPtr,
74  "Number of fully cached non-local ptr responses");
75 STATISTIC(NumCacheDirtyNonLocalPtr,
76  "Number of cached, but dirty, non-local ptr responses");
77 STATISTIC(NumUncacheNonLocalPtr, "Number of uncached non-local ptr responses");
78 STATISTIC(NumCacheCompleteNonLocalPtr,
79  "Number of block queries that were completely cached");
80 
81 // Limit for the number of instructions to scan in a block.
82 
84  "memdep-block-scan-limit", cl::Hidden, cl::init(100),
85  cl::desc("The number of instructions to scan in a block in memory "
86  "dependency analysis (default = 100)"));
87 
88 static cl::opt<unsigned>
89  BlockNumberLimit("memdep-block-number-limit", cl::Hidden, cl::init(1000),
90  cl::desc("The number of blocks to scan during memory "
91  "dependency analysis (default = 1000)"));
92 
93 // Limit on the number of memdep results to process.
94 static const unsigned int NumResultsLimit = 100;
95 
96 /// This is a helper function that removes Val from 'Inst's set in ReverseMap.
97 ///
98 /// If the set becomes empty, remove Inst's entry.
99 template <typename KeyTy>
100 static void
102  Instruction *Inst, KeyTy Val) {
103  typename DenseMap<Instruction *, SmallPtrSet<KeyTy, 4>>::iterator InstIt =
104  ReverseMap.find(Inst);
105  assert(InstIt != ReverseMap.end() && "Reverse map out of sync?");
106  bool Found = InstIt->second.erase(Val);
107  assert(Found && "Invalid reverse map!");
108  (void)Found;
109  if (InstIt->second.empty())
110  ReverseMap.erase(InstIt);
111 }
112 
113 /// If the given instruction references a specific memory location, fill in Loc
114 /// with the details, otherwise set Loc.Ptr to null.
115 ///
116 /// Returns a ModRefInfo value describing the general behavior of the
117 /// instruction.
119  const TargetLibraryInfo &TLI) {
120  if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
121  if (LI->isUnordered()) {
122  Loc = MemoryLocation::get(LI);
123  return ModRefInfo::Ref;
124  }
125  if (LI->getOrdering() == AtomicOrdering::Monotonic) {
126  Loc = MemoryLocation::get(LI);
127  return ModRefInfo::ModRef;
128  }
129  Loc = MemoryLocation();
130  return ModRefInfo::ModRef;
131  }
132 
133  if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
134  if (SI->isUnordered()) {
135  Loc = MemoryLocation::get(SI);
136  return ModRefInfo::Mod;
137  }
138  if (SI->getOrdering() == AtomicOrdering::Monotonic) {
139  Loc = MemoryLocation::get(SI);
140  return ModRefInfo::ModRef;
141  }
142  Loc = MemoryLocation();
143  return ModRefInfo::ModRef;
144  }
145 
146  if (const VAArgInst *V = dyn_cast<VAArgInst>(Inst)) {
147  Loc = MemoryLocation::get(V);
148  return ModRefInfo::ModRef;
149  }
150 
151  if (const CallInst *CI = isFreeCall(Inst, &TLI)) {
152  // calls to free() deallocate the entire structure
153  Loc = MemoryLocation(CI->getArgOperand(0));
154  return ModRefInfo::Mod;
155  }
156 
157  if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
158  switch (II->getIntrinsicID()) {
159  case Intrinsic::lifetime_start:
160  case Intrinsic::lifetime_end:
161  case Intrinsic::invariant_start:
162  Loc = MemoryLocation::getForArgument(II, 1, TLI);
163  // These intrinsics don't really modify the memory, but returning Mod
164  // will allow them to be handled conservatively.
165  return ModRefInfo::Mod;
166  case Intrinsic::invariant_end:
167  Loc = MemoryLocation::getForArgument(II, 2, TLI);
168  // These intrinsics don't really modify the memory, but returning Mod
169  // will allow them to be handled conservatively.
170  return ModRefInfo::Mod;
171  default:
172  break;
173  }
174  }
175 
176  // Otherwise, just do the coarse-grained thing that always works.
177  if (Inst->mayWriteToMemory())
178  return ModRefInfo::ModRef;
179  if (Inst->mayReadFromMemory())
180  return ModRefInfo::Ref;
181  return ModRefInfo::NoModRef;
182 }
183 
184 /// Private helper for finding the local dependencies of a call site.
185 MemDepResult MemoryDependenceResults::getCallSiteDependencyFrom(
186  CallSite CS, bool isReadOnlyCall, BasicBlock::iterator ScanIt,
187  BasicBlock *BB) {
188  unsigned Limit = BlockScanLimit;
189 
190  // Walk backwards through the block, looking for dependencies.
191  while (ScanIt != BB->begin()) {
192  Instruction *Inst = &*--ScanIt;
193  // Debug intrinsics don't cause dependences and should not affect Limit
194  if (isa<DbgInfoIntrinsic>(Inst))
195  continue;
196 
197  // Limit the amount of scanning we do so we don't end up with quadratic
198  // running time on extreme testcases.
199  --Limit;
200  if (!Limit)
201  return MemDepResult::getUnknown();
202 
203  // If this inst is a memory op, get the pointer it accessed
204  MemoryLocation Loc;
205  ModRefInfo MR = GetLocation(Inst, Loc, TLI);
206  if (Loc.Ptr) {
207  // A simple instruction.
208  if (isModOrRefSet(AA.getModRefInfo(CS, Loc)))
209  return MemDepResult::getClobber(Inst);
210  continue;
211  }
212 
213  if (auto InstCS = CallSite(Inst)) {
214  // If these two calls do not interfere, look past it.
215  if (isNoModRef(AA.getModRefInfo(CS, InstCS))) {
216  // If the two calls are the same, return InstCS as a Def, so that
217  // CS can be found redundant and eliminated.
218  if (isReadOnlyCall && !isModSet(MR) &&
220  return MemDepResult::getDef(Inst);
221 
222  // Otherwise if the two calls don't interact (e.g. InstCS is readnone)
223  // keep scanning.
224  continue;
225  } else
226  return MemDepResult::getClobber(Inst);
227  }
228 
229  // If we could not obtain a pointer for the instruction and the instruction
230  // touches memory then assume that this is a dependency.
231  if (isModOrRefSet(MR))
232  return MemDepResult::getClobber(Inst);
233  }
234 
235  // No dependence found. If this is the entry block of the function, it is
236  // unknown, otherwise it is non-local.
237  if (BB != &BB->getParent()->getEntryBlock())
238  return MemDepResult::getNonLocal();
240 }
241 
243  const Value *MemLocBase, int64_t MemLocOffs, unsigned MemLocSize,
244  const LoadInst *LI) {
245  // We can only extend simple integer loads.
246  if (!isa<IntegerType>(LI->getType()) || !LI->isSimple())
247  return 0;
248 
249  // Load widening is hostile to ThreadSanitizer: it may cause false positives
250  // or make the reports more cryptic (access sizes are wrong).
251  if (LI->getParent()->getParent()->hasFnAttribute(Attribute::SanitizeThread))
252  return 0;
253 
254  const DataLayout &DL = LI->getModule()->getDataLayout();
255 
256  // Get the base of this load.
257  int64_t LIOffs = 0;
258  const Value *LIBase =
260 
261  // If the two pointers are not based on the same pointer, we can't tell that
262  // they are related.
263  if (LIBase != MemLocBase)
264  return 0;
265 
266  // Okay, the two values are based on the same pointer, but returned as
267  // no-alias. This happens when we have things like two byte loads at "P+1"
268  // and "P+3". Check to see if increasing the size of the "LI" load up to its
269  // alignment (or the largest native integer type) will allow us to load all
270  // the bits required by MemLoc.
271 
272  // If MemLoc is before LI, then no widening of LI will help us out.
273  if (MemLocOffs < LIOffs)
274  return 0;
275 
276  // Get the alignment of the load in bytes. We assume that it is safe to load
277  // any legal integer up to this size without a problem. For example, if we're
278  // looking at an i8 load on x86-32 that is known 1024 byte aligned, we can
279  // widen it up to an i32 load. If it is known 2-byte aligned, we can widen it
280  // to i16.
281  unsigned LoadAlign = LI->getAlignment();
282 
283  int64_t MemLocEnd = MemLocOffs + MemLocSize;
284 
285  // If no amount of rounding up will let MemLoc fit into LI, then bail out.
286  if (LIOffs + LoadAlign < MemLocEnd)
287  return 0;
288 
289  // This is the size of the load to try. Start with the next larger power of
290  // two.
291  unsigned NewLoadByteSize = LI->getType()->getPrimitiveSizeInBits() / 8U;
292  NewLoadByteSize = NextPowerOf2(NewLoadByteSize);
293 
294  while (true) {
295  // If this load size is bigger than our known alignment or would not fit
296  // into a native integer register, then we fail.
297  if (NewLoadByteSize > LoadAlign ||
298  !DL.fitsInLegalInteger(NewLoadByteSize * 8))
299  return 0;
300 
301  if (LIOffs + NewLoadByteSize > MemLocEnd &&
303  Attribute::SanitizeAddress) ||
305  Attribute::SanitizeHWAddress)))
306  // We will be reading past the location accessed by the original program.
307  // While this is safe in a regular build, Address Safety analysis tools
308  // may start reporting false warnings. So, don't do widening.
309  return 0;
310 
311  // If a load of this width would include all of MemLoc, then we succeed.
312  if (LIOffs + NewLoadByteSize >= MemLocEnd)
313  return NewLoadByteSize;
314 
315  NewLoadByteSize <<= 1;
316  }
317 }
318 
319 static bool isVolatile(Instruction *Inst) {
320  if (auto *LI = dyn_cast<LoadInst>(Inst))
321  return LI->isVolatile();
322  if (auto *SI = dyn_cast<StoreInst>(Inst))
323  return SI->isVolatile();
324  if (auto *AI = dyn_cast<AtomicCmpXchgInst>(Inst))
325  return AI->isVolatile();
326  return false;
327 }
328 
330  const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
331  BasicBlock *BB, Instruction *QueryInst, unsigned *Limit) {
332  MemDepResult InvariantGroupDependency = MemDepResult::getUnknown();
333  if (QueryInst != nullptr) {
334  if (auto *LI = dyn_cast<LoadInst>(QueryInst)) {
335  InvariantGroupDependency = getInvariantGroupPointerDependency(LI, BB);
336 
337  if (InvariantGroupDependency.isDef())
338  return InvariantGroupDependency;
339  }
340  }
342  MemLoc, isLoad, ScanIt, BB, QueryInst, Limit);
343  if (SimpleDep.isDef())
344  return SimpleDep;
345  // Non-local invariant group dependency indicates there is non local Def
346  // (it only returns nonLocal if it finds nonLocal def), which is better than
347  // local clobber and everything else.
348  if (InvariantGroupDependency.isNonLocal())
349  return InvariantGroupDependency;
350 
351  assert(InvariantGroupDependency.isUnknown() &&
352  "InvariantGroupDependency should be only unknown at this point");
353  return SimpleDep;
354 }
355 
358  BasicBlock *BB) {
359 
361  return MemDepResult::getUnknown();
362 
363  // Take the ptr operand after all casts and geps 0. This way we can search
364  // cast graph down only.
365  Value *LoadOperand = LI->getPointerOperand()->stripPointerCasts();
366 
367  // It's is not safe to walk the use list of global value, because function
368  // passes aren't allowed to look outside their functions.
369  // FIXME: this could be fixed by filtering instructions from outside
370  // of current function.
371  if (isa<GlobalValue>(LoadOperand))
372  return MemDepResult::getUnknown();
373 
374  // Queue to process all pointers that are equivalent to load operand.
375  SmallVector<const Value *, 8> LoadOperandsQueue;
376  LoadOperandsQueue.push_back(LoadOperand);
377 
378  Instruction *ClosestDependency = nullptr;
379  // Order of instructions in uses list is unpredictible. In order to always
380  // get the same result, we will look for the closest dominance.
381  auto GetClosestDependency = [this](Instruction *Best, Instruction *Other) {
382  assert(Other && "Must call it with not null instruction");
383  if (Best == nullptr || DT.dominates(Best, Other))
384  return Other;
385  return Best;
386  };
387 
388  // FIXME: This loop is O(N^2) because dominates can be O(n) and in worst case
389  // we will see all the instructions. This should be fixed in MSSA.
390  while (!LoadOperandsQueue.empty()) {
391  const Value *Ptr = LoadOperandsQueue.pop_back_val();
392  assert(Ptr && !isa<GlobalValue>(Ptr) &&
393  "Null or GlobalValue should not be inserted");
394 
395  for (const Use &Us : Ptr->uses()) {
396  auto *U = dyn_cast<Instruction>(Us.getUser());
397  if (!U || U == LI || !DT.dominates(U, LI))
398  continue;
399 
400  // Bitcast or gep with zeros are using Ptr. Add to queue to check it's
401  // users. U = bitcast Ptr
402  if (isa<BitCastInst>(U)) {
403  LoadOperandsQueue.push_back(U);
404  continue;
405  }
406  // Gep with zeros is equivalent to bitcast.
407  // FIXME: we are not sure if some bitcast should be canonicalized to gep 0
408  // or gep 0 to bitcast because of SROA, so there are 2 forms. When
409  // typeless pointers will be ready then both cases will be gone
410  // (and this BFS also won't be needed).
411  if (auto *GEP = dyn_cast<GetElementPtrInst>(U))
412  if (GEP->hasAllZeroIndices()) {
413  LoadOperandsQueue.push_back(U);
414  continue;
415  }
416 
417  // If we hit load/store with the same invariant.group metadata (and the
418  // same pointer operand) we can assume that value pointed by pointer
419  // operand didn't change.
420  if ((isa<LoadInst>(U) || isa<StoreInst>(U)) &&
421  U->getMetadata(LLVMContext::MD_invariant_group) != nullptr)
422  ClosestDependency = GetClosestDependency(ClosestDependency, U);
423  }
424  }
425 
426  if (!ClosestDependency)
427  return MemDepResult::getUnknown();
428  if (ClosestDependency->getParent() == BB)
429  return MemDepResult::getDef(ClosestDependency);
430  // Def(U) can't be returned here because it is non-local. If local
431  // dependency won't be found then return nonLocal counting that the
432  // user will call getNonLocalPointerDependency, which will return cached
433  // result.
434  NonLocalDefsCache.try_emplace(
435  LI, NonLocalDepResult(ClosestDependency->getParent(),
436  MemDepResult::getDef(ClosestDependency), nullptr));
437  ReverseNonLocalDefsCache[ClosestDependency].insert(LI);
438  return MemDepResult::getNonLocal();
439 }
440 
442  const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
443  BasicBlock *BB, Instruction *QueryInst, unsigned *Limit) {
444  bool isInvariantLoad = false;
445 
446  if (!Limit) {
447  unsigned DefaultLimit = BlockScanLimit;
448  return getSimplePointerDependencyFrom(MemLoc, isLoad, ScanIt, BB, QueryInst,
449  &DefaultLimit);
450  }
451 
452  // We must be careful with atomic accesses, as they may allow another thread
453  // to touch this location, clobbering it. We are conservative: if the
454  // QueryInst is not a simple (non-atomic) memory access, we automatically
455  // return getClobber.
456  // If it is simple, we know based on the results of
457  // "Compiler testing via a theory of sound optimisations in the C11/C++11
458  // memory model" in PLDI 2013, that a non-atomic location can only be
459  // clobbered between a pair of a release and an acquire action, with no
460  // access to the location in between.
461  // Here is an example for giving the general intuition behind this rule.
462  // In the following code:
463  // store x 0;
464  // release action; [1]
465  // acquire action; [4]
466  // %val = load x;
467  // It is unsafe to replace %val by 0 because another thread may be running:
468  // acquire action; [2]
469  // store x 42;
470  // release action; [3]
471  // with synchronization from 1 to 2 and from 3 to 4, resulting in %val
472  // being 42. A key property of this program however is that if either
473  // 1 or 4 were missing, there would be a race between the store of 42
474  // either the store of 0 or the load (making the whole program racy).
475  // The paper mentioned above shows that the same property is respected
476  // by every program that can detect any optimization of that kind: either
477  // it is racy (undefined) or there is a release followed by an acquire
478  // between the pair of accesses under consideration.
479 
480  // If the load is invariant, we "know" that it doesn't alias *any* write. We
481  // do want to respect mustalias results since defs are useful for value
482  // forwarding, but any mayalias write can be assumed to be noalias.
483  // Arguably, this logic should be pushed inside AliasAnalysis itself.
484  if (isLoad && QueryInst) {
485  LoadInst *LI = dyn_cast<LoadInst>(QueryInst);
486  if (LI && LI->getMetadata(LLVMContext::MD_invariant_load) != nullptr)
487  isInvariantLoad = true;
488  }
489 
490  const DataLayout &DL = BB->getModule()->getDataLayout();
491 
492  // Create a numbered basic block to lazily compute and cache instruction
493  // positions inside a BB. This is used to provide fast queries for relative
494  // position between two instructions in a BB and can be used by
495  // AliasAnalysis::callCapturesBefore.
496  OrderedBasicBlock OBB(BB);
497 
498  // Return "true" if and only if the instruction I is either a non-simple
499  // load or a non-simple store.
500  auto isNonSimpleLoadOrStore = [](Instruction *I) -> bool {
501  if (auto *LI = dyn_cast<LoadInst>(I))
502  return !LI->isSimple();
503  if (auto *SI = dyn_cast<StoreInst>(I))
504  return !SI->isSimple();
505  return false;
506  };
507 
508  // Return "true" if I is not a load and not a store, but it does access
509  // memory.
510  auto isOtherMemAccess = [](Instruction *I) -> bool {
511  return !isa<LoadInst>(I) && !isa<StoreInst>(I) && I->mayReadOrWriteMemory();
512  };
513 
514  // Walk backwards through the basic block, looking for dependencies.
515  while (ScanIt != BB->begin()) {
516  Instruction *Inst = &*--ScanIt;
517 
518  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
519  // Debug intrinsics don't (and can't) cause dependencies.
520  if (isa<DbgInfoIntrinsic>(II))
521  continue;
522 
523  // Limit the amount of scanning we do so we don't end up with quadratic
524  // running time on extreme testcases.
525  --*Limit;
526  if (!*Limit)
527  return MemDepResult::getUnknown();
528 
529  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
530  // If we reach a lifetime begin or end marker, then the query ends here
531  // because the value is undefined.
532  if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
533  // FIXME: This only considers queries directly on the invariant-tagged
534  // pointer, not on query pointers that are indexed off of them. It'd
535  // be nice to handle that at some point (the right approach is to use
536  // GetPointerBaseWithConstantOffset).
537  if (AA.isMustAlias(MemoryLocation(II->getArgOperand(1)), MemLoc))
538  return MemDepResult::getDef(II);
539  continue;
540  }
541  }
542 
543  // Values depend on loads if the pointers are must aliased. This means
544  // that a load depends on another must aliased load from the same value.
545  // One exception is atomic loads: a value can depend on an atomic load that
546  // it does not alias with when this atomic load indicates that another
547  // thread may be accessing the location.
548  if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
549  // While volatile access cannot be eliminated, they do not have to clobber
550  // non-aliasing locations, as normal accesses, for example, can be safely
551  // reordered with volatile accesses.
552  if (LI->isVolatile()) {
553  if (!QueryInst)
554  // Original QueryInst *may* be volatile
555  return MemDepResult::getClobber(LI);
556  if (isVolatile(QueryInst))
557  // Ordering required if QueryInst is itself volatile
558  return MemDepResult::getClobber(LI);
559  // Otherwise, volatile doesn't imply any special ordering
560  }
561 
562  // Atomic loads have complications involved.
563  // A Monotonic (or higher) load is OK if the query inst is itself not
564  // atomic.
565  // FIXME: This is overly conservative.
566  if (LI->isAtomic() && isStrongerThanUnordered(LI->getOrdering())) {
567  if (!QueryInst || isNonSimpleLoadOrStore(QueryInst) ||
568  isOtherMemAccess(QueryInst))
569  return MemDepResult::getClobber(LI);
570  if (LI->getOrdering() != AtomicOrdering::Monotonic)
571  return MemDepResult::getClobber(LI);
572  }
573 
574  MemoryLocation LoadLoc = MemoryLocation::get(LI);
575 
576  // If we found a pointer, check if it could be the same as our pointer.
577  AliasResult R = AA.alias(LoadLoc, MemLoc);
578 
579  if (isLoad) {
580  if (R == NoAlias)
581  continue;
582 
583  // Must aliased loads are defs of each other.
584  if (R == MustAlias)
585  return MemDepResult::getDef(Inst);
586 
587 #if 0 // FIXME: Temporarily disabled. GVN is cleverly rewriting loads
588  // in terms of clobbering loads, but since it does this by looking
589  // at the clobbering load directly, it doesn't know about any
590  // phi translation that may have happened along the way.
591 
592  // If we have a partial alias, then return this as a clobber for the
593  // client to handle.
594  if (R == PartialAlias)
595  return MemDepResult::getClobber(Inst);
596 #endif
597 
598  // Random may-alias loads don't depend on each other without a
599  // dependence.
600  continue;
601  }
602 
603  // Stores don't depend on other no-aliased accesses.
604  if (R == NoAlias)
605  continue;
606 
607  // Stores don't alias loads from read-only memory.
608  if (AA.pointsToConstantMemory(LoadLoc))
609  continue;
610 
611  // Stores depend on may/must aliased loads.
612  return MemDepResult::getDef(Inst);
613  }
614 
615  if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
616  // Atomic stores have complications involved.
617  // A Monotonic store is OK if the query inst is itself not atomic.
618  // FIXME: This is overly conservative.
619  if (!SI->isUnordered() && SI->isAtomic()) {
620  if (!QueryInst || isNonSimpleLoadOrStore(QueryInst) ||
621  isOtherMemAccess(QueryInst))
623  if (SI->getOrdering() != AtomicOrdering::Monotonic)
625  }
626 
627  // FIXME: this is overly conservative.
628  // While volatile access cannot be eliminated, they do not have to clobber
629  // non-aliasing locations, as normal accesses can for example be reordered
630  // with volatile accesses.
631  if (SI->isVolatile())
632  if (!QueryInst || isNonSimpleLoadOrStore(QueryInst) ||
633  isOtherMemAccess(QueryInst))
635 
636  // If alias analysis can tell that this store is guaranteed to not modify
637  // the query pointer, ignore it. Use getModRefInfo to handle cases where
638  // the query pointer points to constant memory etc.
639  if (!isModOrRefSet(AA.getModRefInfo(SI, MemLoc)))
640  continue;
641 
642  // Ok, this store might clobber the query pointer. Check to see if it is
643  // a must alias: in this case, we want to return this as a def.
644  // FIXME: Use ModRefInfo::Must bit from getModRefInfo call above.
646 
647  // If we found a pointer, check if it could be the same as our pointer.
648  AliasResult R = AA.alias(StoreLoc, MemLoc);
649 
650  if (R == NoAlias)
651  continue;
652  if (R == MustAlias)
653  return MemDepResult::getDef(Inst);
654  if (isInvariantLoad)
655  continue;
656  return MemDepResult::getClobber(Inst);
657  }
658 
659  // If this is an allocation, and if we know that the accessed pointer is to
660  // the allocation, return Def. This means that there is no dependence and
661  // the access can be optimized based on that. For example, a load could
662  // turn into undef. Note that we can bypass the allocation itself when
663  // looking for a clobber in many cases; that's an alias property and is
664  // handled by BasicAA.
665  if (isa<AllocaInst>(Inst) || isNoAliasFn(Inst, &TLI)) {
666  const Value *AccessPtr = GetUnderlyingObject(MemLoc.Ptr, DL);
667  if (AccessPtr == Inst || AA.isMustAlias(Inst, AccessPtr))
668  return MemDepResult::getDef(Inst);
669  }
670 
671  if (isInvariantLoad)
672  continue;
673 
674  // A release fence requires that all stores complete before it, but does
675  // not prevent the reordering of following loads or stores 'before' the
676  // fence. As a result, we look past it when finding a dependency for
677  // loads. DSE uses this to find preceeding stores to delete and thus we
678  // can't bypass the fence if the query instruction is a store.
679  if (FenceInst *FI = dyn_cast<FenceInst>(Inst))
680  if (isLoad && FI->getOrdering() == AtomicOrdering::Release)
681  continue;
682 
683  // See if this instruction (e.g. a call or vaarg) mod/ref's the pointer.
684  ModRefInfo MR = AA.getModRefInfo(Inst, MemLoc);
685  // If necessary, perform additional analysis.
686  if (isModAndRefSet(MR))
687  MR = AA.callCapturesBefore(Inst, MemLoc, &DT, &OBB);
688  switch (clearMust(MR)) {
690  // If the call has no effect on the queried pointer, just ignore it.
691  continue;
692  case ModRefInfo::Mod:
693  return MemDepResult::getClobber(Inst);
694  case ModRefInfo::Ref:
695  // If the call is known to never store to the pointer, and if this is a
696  // load query, we can safely ignore it (scan past it).
697  if (isLoad)
698  continue;
700  default:
701  // Otherwise, there is a potential dependence. Return a clobber.
702  return MemDepResult::getClobber(Inst);
703  }
704  }
705 
706  // No dependence found. If this is the entry block of the function, it is
707  // unknown, otherwise it is non-local.
708  if (BB != &BB->getParent()->getEntryBlock())
709  return MemDepResult::getNonLocal();
711 }
712 
714  Instruction *ScanPos = QueryInst;
715 
716  // Check for a cached result
717  MemDepResult &LocalCache = LocalDeps[QueryInst];
718 
719  // If the cached entry is non-dirty, just return it. Note that this depends
720  // on MemDepResult's default constructing to 'dirty'.
721  if (!LocalCache.isDirty())
722  return LocalCache;
723 
724  // Otherwise, if we have a dirty entry, we know we can start the scan at that
725  // instruction, which may save us some work.
726  if (Instruction *Inst = LocalCache.getInst()) {
727  ScanPos = Inst;
728 
729  RemoveFromReverseMap(ReverseLocalDeps, Inst, QueryInst);
730  }
731 
732  BasicBlock *QueryParent = QueryInst->getParent();
733 
734  // Do the scan.
735  if (BasicBlock::iterator(QueryInst) == QueryParent->begin()) {
736  // No dependence found. If this is the entry block of the function, it is
737  // unknown, otherwise it is non-local.
738  if (QueryParent != &QueryParent->getParent()->getEntryBlock())
739  LocalCache = MemDepResult::getNonLocal();
740  else
741  LocalCache = MemDepResult::getNonFuncLocal();
742  } else {
743  MemoryLocation MemLoc;
744  ModRefInfo MR = GetLocation(QueryInst, MemLoc, TLI);
745  if (MemLoc.Ptr) {
746  // If we can do a pointer scan, make it happen.
747  bool isLoad = !isModSet(MR);
748  if (auto *II = dyn_cast<IntrinsicInst>(QueryInst))
749  isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_start;
750 
751  LocalCache = getPointerDependencyFrom(
752  MemLoc, isLoad, ScanPos->getIterator(), QueryParent, QueryInst);
753  } else if (isa<CallInst>(QueryInst) || isa<InvokeInst>(QueryInst)) {
754  CallSite QueryCS(QueryInst);
755  bool isReadOnly = AA.onlyReadsMemory(QueryCS);
756  LocalCache = getCallSiteDependencyFrom(
757  QueryCS, isReadOnly, ScanPos->getIterator(), QueryParent);
758  } else
759  // Non-memory instruction.
760  LocalCache = MemDepResult::getUnknown();
761  }
762 
763  // Remember the result!
764  if (Instruction *I = LocalCache.getInst())
765  ReverseLocalDeps[I].insert(QueryInst);
766 
767  return LocalCache;
768 }
769 
770 #ifndef NDEBUG
771 /// This method is used when -debug is specified to verify that cache arrays
772 /// are properly kept sorted.
774  int Count = -1) {
775  if (Count == -1)
776  Count = Cache.size();
777  assert(std::is_sorted(Cache.begin(), Cache.begin() + Count) &&
778  "Cache isn't sorted!");
779 }
780 #endif
781 
784  assert(getDependency(QueryCS.getInstruction()).isNonLocal() &&
785  "getNonLocalCallDependency should only be used on calls with "
786  "non-local deps!");
787  PerInstNLInfo &CacheP = NonLocalDeps[QueryCS.getInstruction()];
788  NonLocalDepInfo &Cache = CacheP.first;
789 
790  // This is the set of blocks that need to be recomputed. In the cached case,
791  // this can happen due to instructions being deleted etc. In the uncached
792  // case, this starts out as the set of predecessors we care about.
793  SmallVector<BasicBlock *, 32> DirtyBlocks;
794 
795  if (!Cache.empty()) {
796  // Okay, we have a cache entry. If we know it is not dirty, just return it
797  // with no computation.
798  if (!CacheP.second) {
799  ++NumCacheNonLocal;
800  return Cache;
801  }
802 
803  // If we already have a partially computed set of results, scan them to
804  // determine what is dirty, seeding our initial DirtyBlocks worklist.
805  for (auto &Entry : Cache)
806  if (Entry.getResult().isDirty())
807  DirtyBlocks.push_back(Entry.getBB());
808 
809  // Sort the cache so that we can do fast binary search lookups below.
810  llvm::sort(Cache);
811 
812  ++NumCacheDirtyNonLocal;
813  // cerr << "CACHED CASE: " << DirtyBlocks.size() << " dirty: "
814  // << Cache.size() << " cached: " << *QueryInst;
815  } else {
816  // Seed DirtyBlocks with each of the preds of QueryInst's block.
817  BasicBlock *QueryBB = QueryCS.getInstruction()->getParent();
818  for (BasicBlock *Pred : PredCache.get(QueryBB))
819  DirtyBlocks.push_back(Pred);
820  ++NumUncacheNonLocal;
821  }
822 
823  // isReadonlyCall - If this is a read-only call, we can be more aggressive.
824  bool isReadonlyCall = AA.onlyReadsMemory(QueryCS);
825 
827 
828  unsigned NumSortedEntries = Cache.size();
829  LLVM_DEBUG(AssertSorted(Cache));
830 
831  // Iterate while we still have blocks to update.
832  while (!DirtyBlocks.empty()) {
833  BasicBlock *DirtyBB = DirtyBlocks.back();
834  DirtyBlocks.pop_back();
835 
836  // Already processed this block?
837  if (!Visited.insert(DirtyBB).second)
838  continue;
839 
840  // Do a binary search to see if we already have an entry for this block in
841  // the cache set. If so, find it.
842  LLVM_DEBUG(AssertSorted(Cache, NumSortedEntries));
843  NonLocalDepInfo::iterator Entry =
844  std::upper_bound(Cache.begin(), Cache.begin() + NumSortedEntries,
845  NonLocalDepEntry(DirtyBB));
846  if (Entry != Cache.begin() && std::prev(Entry)->getBB() == DirtyBB)
847  --Entry;
848 
849  NonLocalDepEntry *ExistingResult = nullptr;
850  if (Entry != Cache.begin() + NumSortedEntries &&
851  Entry->getBB() == DirtyBB) {
852  // If we already have an entry, and if it isn't already dirty, the block
853  // is done.
854  if (!Entry->getResult().isDirty())
855  continue;
856 
857  // Otherwise, remember this slot so we can update the value.
858  ExistingResult = &*Entry;
859  }
860 
861  // If the dirty entry has a pointer, start scanning from it so we don't have
862  // to rescan the entire block.
863  BasicBlock::iterator ScanPos = DirtyBB->end();
864  if (ExistingResult) {
865  if (Instruction *Inst = ExistingResult->getResult().getInst()) {
866  ScanPos = Inst->getIterator();
867  // We're removing QueryInst's use of Inst.
868  RemoveFromReverseMap(ReverseNonLocalDeps, Inst,
869  QueryCS.getInstruction());
870  }
871  }
872 
873  // Find out if this block has a local dependency for QueryInst.
874  MemDepResult Dep;
875 
876  if (ScanPos != DirtyBB->begin()) {
877  Dep =
878  getCallSiteDependencyFrom(QueryCS, isReadonlyCall, ScanPos, DirtyBB);
879  } else if (DirtyBB != &DirtyBB->getParent()->getEntryBlock()) {
880  // No dependence found. If this is the entry block of the function, it is
881  // a clobber, otherwise it is unknown.
883  } else {
885  }
886 
887  // If we had a dirty entry for the block, update it. Otherwise, just add
888  // a new entry.
889  if (ExistingResult)
890  ExistingResult->setResult(Dep);
891  else
892  Cache.push_back(NonLocalDepEntry(DirtyBB, Dep));
893 
894  // If the block has a dependency (i.e. it isn't completely transparent to
895  // the value), remember the association!
896  if (!Dep.isNonLocal()) {
897  // Keep the ReverseNonLocalDeps map up to date so we can efficiently
898  // update this when we remove instructions.
899  if (Instruction *Inst = Dep.getInst())
900  ReverseNonLocalDeps[Inst].insert(QueryCS.getInstruction());
901  } else {
902 
903  // If the block *is* completely transparent to the load, we need to check
904  // the predecessors of this block. Add them to our worklist.
905  for (BasicBlock *Pred : PredCache.get(DirtyBB))
906  DirtyBlocks.push_back(Pred);
907  }
908  }
909 
910  return Cache;
911 }
912 
914  Instruction *QueryInst, SmallVectorImpl<NonLocalDepResult> &Result) {
915  const MemoryLocation Loc = MemoryLocation::get(QueryInst);
916  bool isLoad = isa<LoadInst>(QueryInst);
917  BasicBlock *FromBB = QueryInst->getParent();
918  assert(FromBB);
919 
920  assert(Loc.Ptr->getType()->isPointerTy() &&
921  "Can't get pointer deps of a non-pointer!");
922  Result.clear();
923  {
924  // Check if there is cached Def with invariant.group.
925  auto NonLocalDefIt = NonLocalDefsCache.find(QueryInst);
926  if (NonLocalDefIt != NonLocalDefsCache.end()) {
927  Result.push_back(NonLocalDefIt->second);
928  ReverseNonLocalDefsCache[NonLocalDefIt->second.getResult().getInst()]
929  .erase(QueryInst);
930  NonLocalDefsCache.erase(NonLocalDefIt);
931  return;
932  }
933  }
934  // This routine does not expect to deal with volatile instructions.
935  // Doing so would require piping through the QueryInst all the way through.
936  // TODO: volatiles can't be elided, but they can be reordered with other
937  // non-volatile accesses.
938 
939  // We currently give up on any instruction which is ordered, but we do handle
940  // atomic instructions which are unordered.
941  // TODO: Handle ordered instructions
942  auto isOrdered = [](Instruction *Inst) {
943  if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
944  return !LI->isUnordered();
945  } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
946  return !SI->isUnordered();
947  }
948  return false;
949  };
950  if (isVolatile(QueryInst) || isOrdered(QueryInst)) {
952  const_cast<Value *>(Loc.Ptr)));
953  return;
954  }
955  const DataLayout &DL = FromBB->getModule()->getDataLayout();
956  PHITransAddr Address(const_cast<Value *>(Loc.Ptr), DL, &AC);
957 
958  // This is the set of blocks we've inspected, and the pointer we consider in
959  // each block. Because of critical edges, we currently bail out if querying
960  // a block with multiple different pointers. This can happen during PHI
961  // translation.
963  if (getNonLocalPointerDepFromBB(QueryInst, Address, Loc, isLoad, FromBB,
964  Result, Visited, true))
965  return;
966  Result.clear();
968  const_cast<Value *>(Loc.Ptr)));
969 }
970 
971 /// Compute the memdep value for BB with Pointer/PointeeSize using either
972 /// cached information in Cache or by doing a lookup (which may use dirty cache
973 /// info if available).
974 ///
975 /// If we do a lookup, add the result to the cache.
976 MemDepResult MemoryDependenceResults::GetNonLocalInfoForBlock(
977  Instruction *QueryInst, const MemoryLocation &Loc, bool isLoad,
978  BasicBlock *BB, NonLocalDepInfo *Cache, unsigned NumSortedEntries) {
979 
980  // Do a binary search to see if we already have an entry for this block in
981  // the cache set. If so, find it.
982  NonLocalDepInfo::iterator Entry = std::upper_bound(
983  Cache->begin(), Cache->begin() + NumSortedEntries, NonLocalDepEntry(BB));
984  if (Entry != Cache->begin() && (Entry - 1)->getBB() == BB)
985  --Entry;
986 
987  NonLocalDepEntry *ExistingResult = nullptr;
988  if (Entry != Cache->begin() + NumSortedEntries && Entry->getBB() == BB)
989  ExistingResult = &*Entry;
990 
991  // If we have a cached entry, and it is non-dirty, use it as the value for
992  // this dependency.
993  if (ExistingResult && !ExistingResult->getResult().isDirty()) {
994  ++NumCacheNonLocalPtr;
995  return ExistingResult->getResult();
996  }
997 
998  // Otherwise, we have to scan for the value. If we have a dirty cache
999  // entry, start scanning from its position, otherwise we scan from the end
1000  // of the block.
1001  BasicBlock::iterator ScanPos = BB->end();
1002  if (ExistingResult && ExistingResult->getResult().getInst()) {
1003  assert(ExistingResult->getResult().getInst()->getParent() == BB &&
1004  "Instruction invalidated?");
1005  ++NumCacheDirtyNonLocalPtr;
1006  ScanPos = ExistingResult->getResult().getInst()->getIterator();
1007 
1008  // Eliminating the dirty entry from 'Cache', so update the reverse info.
1009  ValueIsLoadPair CacheKey(Loc.Ptr, isLoad);
1010  RemoveFromReverseMap(ReverseNonLocalPtrDeps, &*ScanPos, CacheKey);
1011  } else {
1012  ++NumUncacheNonLocalPtr;
1013  }
1014 
1015  // Scan the block for the dependency.
1016  MemDepResult Dep =
1017  getPointerDependencyFrom(Loc, isLoad, ScanPos, BB, QueryInst);
1018 
1019  // If we had a dirty entry for the block, update it. Otherwise, just add
1020  // a new entry.
1021  if (ExistingResult)
1022  ExistingResult->setResult(Dep);
1023  else
1024  Cache->push_back(NonLocalDepEntry(BB, Dep));
1025 
1026  // If the block has a dependency (i.e. it isn't completely transparent to
1027  // the value), remember the reverse association because we just added it
1028  // to Cache!
1029  if (!Dep.isDef() && !Dep.isClobber())
1030  return Dep;
1031 
1032  // Keep the ReverseNonLocalPtrDeps map up to date so we can efficiently
1033  // update MemDep when we remove instructions.
1034  Instruction *Inst = Dep.getInst();
1035  assert(Inst && "Didn't depend on anything?");
1036  ValueIsLoadPair CacheKey(Loc.Ptr, isLoad);
1037  ReverseNonLocalPtrDeps[Inst].insert(CacheKey);
1038  return Dep;
1039 }
1040 
1041 /// Sort the NonLocalDepInfo cache, given a certain number of elements in the
1042 /// array that are already properly ordered.
1043 ///
1044 /// This is optimized for the case when only a few entries are added.
1045 static void
1047  unsigned NumSortedEntries) {
1048  switch (Cache.size() - NumSortedEntries) {
1049  case 0:
1050  // done, no new entries.
1051  break;
1052  case 2: {
1053  // Two new entries, insert the last one into place.
1054  NonLocalDepEntry Val = Cache.back();
1055  Cache.pop_back();
1056  MemoryDependenceResults::NonLocalDepInfo::iterator Entry =
1057  std::upper_bound(Cache.begin(), Cache.end() - 1, Val);
1058  Cache.insert(Entry, Val);
1060  }
1061  case 1:
1062  // One new entry, Just insert the new value at the appropriate position.
1063  if (Cache.size() != 1) {
1064  NonLocalDepEntry Val = Cache.back();
1065  Cache.pop_back();
1066  MemoryDependenceResults::NonLocalDepInfo::iterator Entry =
1067  std::upper_bound(Cache.begin(), Cache.end(), Val);
1068  Cache.insert(Entry, Val);
1069  }
1070  break;
1071  default:
1072  // Added many values, do a full scale sort.
1073  llvm::sort(Cache);
1074  break;
1075  }
1076 }
1077 
1078 /// Perform a dependency query based on pointer/pointeesize starting at the end
1079 /// of StartBB.
1080 ///
1081 /// Add any clobber/def results to the results vector and keep track of which
1082 /// blocks are visited in 'Visited'.
1083 ///
1084 /// This has special behavior for the first block queries (when SkipFirstBlock
1085 /// is true). In this special case, it ignores the contents of the specified
1086 /// block and starts returning dependence info for its predecessors.
1087 ///
1088 /// This function returns true on success, or false to indicate that it could
1089 /// not compute dependence information for some reason. This should be treated
1090 /// as a clobber dependence on the first instruction in the predecessor block.
1091 bool MemoryDependenceResults::getNonLocalPointerDepFromBB(
1092  Instruction *QueryInst, const PHITransAddr &Pointer,
1093  const MemoryLocation &Loc, bool isLoad, BasicBlock *StartBB,
1095  DenseMap<BasicBlock *, Value *> &Visited, bool SkipFirstBlock) {
1096  // Look up the cached info for Pointer.
1097  ValueIsLoadPair CacheKey(Pointer.getAddr(), isLoad);
1098 
1099  // Set up a temporary NLPI value. If the map doesn't yet have an entry for
1100  // CacheKey, this value will be inserted as the associated value. Otherwise,
1101  // it'll be ignored, and we'll have to check to see if the cached size and
1102  // aa tags are consistent with the current query.
1103  NonLocalPointerInfo InitialNLPI;
1104  InitialNLPI.Size = Loc.Size;
1105  InitialNLPI.AATags = Loc.AATags;
1106 
1107  // Get the NLPI for CacheKey, inserting one into the map if it doesn't
1108  // already have one.
1109  std::pair<CachedNonLocalPointerInfo::iterator, bool> Pair =
1110  NonLocalPointerDeps.insert(std::make_pair(CacheKey, InitialNLPI));
1111  NonLocalPointerInfo *CacheInfo = &Pair.first->second;
1112 
1113  // If we already have a cache entry for this CacheKey, we may need to do some
1114  // work to reconcile the cache entry and the current query.
1115  if (!Pair.second) {
1116  if (CacheInfo->Size != Loc.Size) {
1117  bool ThrowOutEverything;
1118  if (CacheInfo->Size.hasValue() && Loc.Size.hasValue()) {
1119  // FIXME: We may be able to do better in the face of results with mixed
1120  // precision. We don't appear to get them in practice, though, so just
1121  // be conservative.
1122  ThrowOutEverything =
1123  CacheInfo->Size.isPrecise() != Loc.Size.isPrecise() ||
1124  CacheInfo->Size.getValue() < Loc.Size.getValue();
1125  } else {
1126  // For our purposes, unknown size > all others.
1127  ThrowOutEverything = !Loc.Size.hasValue();
1128  }
1129 
1130  if (ThrowOutEverything) {
1131  // The query's Size is greater than the cached one. Throw out the
1132  // cached data and proceed with the query at the greater size.
1133  CacheInfo->Pair = BBSkipFirstBlockPair();
1134  CacheInfo->Size = Loc.Size;
1135  for (auto &Entry : CacheInfo->NonLocalDeps)
1136  if (Instruction *Inst = Entry.getResult().getInst())
1137  RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
1138  CacheInfo->NonLocalDeps.clear();
1139  } else {
1140  // This query's Size is less than the cached one. Conservatively restart
1141  // the query using the greater size.
1142  return getNonLocalPointerDepFromBB(
1143  QueryInst, Pointer, Loc.getWithNewSize(CacheInfo->Size), isLoad,
1144  StartBB, Result, Visited, SkipFirstBlock);
1145  }
1146  }
1147 
1148  // If the query's AATags are inconsistent with the cached one,
1149  // conservatively throw out the cached data and restart the query with
1150  // no tag if needed.
1151  if (CacheInfo->AATags != Loc.AATags) {
1152  if (CacheInfo->AATags) {
1153  CacheInfo->Pair = BBSkipFirstBlockPair();
1154  CacheInfo->AATags = AAMDNodes();
1155  for (auto &Entry : CacheInfo->NonLocalDeps)
1156  if (Instruction *Inst = Entry.getResult().getInst())
1157  RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
1158  CacheInfo->NonLocalDeps.clear();
1159  }
1160  if (Loc.AATags)
1161  return getNonLocalPointerDepFromBB(
1162  QueryInst, Pointer, Loc.getWithoutAATags(), isLoad, StartBB, Result,
1163  Visited, SkipFirstBlock);
1164  }
1165  }
1166 
1167  NonLocalDepInfo *Cache = &CacheInfo->NonLocalDeps;
1168 
1169  // If we have valid cached information for exactly the block we are
1170  // investigating, just return it with no recomputation.
1171  if (CacheInfo->Pair == BBSkipFirstBlockPair(StartBB, SkipFirstBlock)) {
1172  // We have a fully cached result for this query then we can just return the
1173  // cached results and populate the visited set. However, we have to verify
1174  // that we don't already have conflicting results for these blocks. Check
1175  // to ensure that if a block in the results set is in the visited set that
1176  // it was for the same pointer query.
1177  if (!Visited.empty()) {
1178  for (auto &Entry : *Cache) {
1180  Visited.find(Entry.getBB());
1181  if (VI == Visited.end() || VI->second == Pointer.getAddr())
1182  continue;
1183 
1184  // We have a pointer mismatch in a block. Just return false, saying
1185  // that something was clobbered in this result. We could also do a
1186  // non-fully cached query, but there is little point in doing this.
1187  return false;
1188  }
1189  }
1190 
1191  Value *Addr = Pointer.getAddr();
1192  for (auto &Entry : *Cache) {
1193  Visited.insert(std::make_pair(Entry.getBB(), Addr));
1194  if (Entry.getResult().isNonLocal()) {
1195  continue;
1196  }
1197 
1198  if (DT.isReachableFromEntry(Entry.getBB())) {
1199  Result.push_back(
1200  NonLocalDepResult(Entry.getBB(), Entry.getResult(), Addr));
1201  }
1202  }
1203  ++NumCacheCompleteNonLocalPtr;
1204  return true;
1205  }
1206 
1207  // Otherwise, either this is a new block, a block with an invalid cache
1208  // pointer or one that we're about to invalidate by putting more info into it
1209  // than its valid cache info. If empty, the result will be valid cache info,
1210  // otherwise it isn't.
1211  if (Cache->empty())
1212  CacheInfo->Pair = BBSkipFirstBlockPair(StartBB, SkipFirstBlock);
1213  else
1214  CacheInfo->Pair = BBSkipFirstBlockPair();
1215 
1217  Worklist.push_back(StartBB);
1218 
1219  // PredList used inside loop.
1221 
1222  // Keep track of the entries that we know are sorted. Previously cached
1223  // entries will all be sorted. The entries we add we only sort on demand (we
1224  // don't insert every element into its sorted position). We know that we
1225  // won't get any reuse from currently inserted values, because we don't
1226  // revisit blocks after we insert info for them.
1227  unsigned NumSortedEntries = Cache->size();
1228  unsigned WorklistEntries = BlockNumberLimit;
1229  bool GotWorklistLimit = false;
1230  LLVM_DEBUG(AssertSorted(*Cache));
1231 
1232  while (!Worklist.empty()) {
1233  BasicBlock *BB = Worklist.pop_back_val();
1234 
1235  // If we do process a large number of blocks it becomes very expensive and
1236  // likely it isn't worth worrying about
1237  if (Result.size() > NumResultsLimit) {
1238  Worklist.clear();
1239  // Sort it now (if needed) so that recursive invocations of
1240  // getNonLocalPointerDepFromBB and other routines that could reuse the
1241  // cache value will only see properly sorted cache arrays.
1242  if (Cache && NumSortedEntries != Cache->size()) {
1243  SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
1244  }
1245  // Since we bail out, the "Cache" set won't contain all of the
1246  // results for the query. This is ok (we can still use it to accelerate
1247  // specific block queries) but we can't do the fastpath "return all
1248  // results from the set". Clear out the indicator for this.
1249  CacheInfo->Pair = BBSkipFirstBlockPair();
1250  return false;
1251  }
1252 
1253  // Skip the first block if we have it.
1254  if (!SkipFirstBlock) {
1255  // Analyze the dependency of *Pointer in FromBB. See if we already have
1256  // been here.
1257  assert(Visited.count(BB) && "Should check 'visited' before adding to WL");
1258 
1259  // Get the dependency info for Pointer in BB. If we have cached
1260  // information, we will use it, otherwise we compute it.
1261  LLVM_DEBUG(AssertSorted(*Cache, NumSortedEntries));
1262  MemDepResult Dep = GetNonLocalInfoForBlock(QueryInst, Loc, isLoad, BB,
1263  Cache, NumSortedEntries);
1264 
1265  // If we got a Def or Clobber, add this to the list of results.
1266  if (!Dep.isNonLocal()) {
1267  if (DT.isReachableFromEntry(BB)) {
1268  Result.push_back(NonLocalDepResult(BB, Dep, Pointer.getAddr()));
1269  continue;
1270  }
1271  }
1272  }
1273 
1274  // If 'Pointer' is an instruction defined in this block, then we need to do
1275  // phi translation to change it into a value live in the predecessor block.
1276  // If not, we just add the predecessors to the worklist and scan them with
1277  // the same Pointer.
1278  if (!Pointer.NeedsPHITranslationFromBlock(BB)) {
1279  SkipFirstBlock = false;
1281  for (BasicBlock *Pred : PredCache.get(BB)) {
1282  // Verify that we haven't looked at this block yet.
1283  std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> InsertRes =
1284  Visited.insert(std::make_pair(Pred, Pointer.getAddr()));
1285  if (InsertRes.second) {
1286  // First time we've looked at *PI.
1287  NewBlocks.push_back(Pred);
1288  continue;
1289  }
1290 
1291  // If we have seen this block before, but it was with a different
1292  // pointer then we have a phi translation failure and we have to treat
1293  // this as a clobber.
1294  if (InsertRes.first->second != Pointer.getAddr()) {
1295  // Make sure to clean up the Visited map before continuing on to
1296  // PredTranslationFailure.
1297  for (unsigned i = 0; i < NewBlocks.size(); i++)
1298  Visited.erase(NewBlocks[i]);
1299  goto PredTranslationFailure;
1300  }
1301  }
1302  if (NewBlocks.size() > WorklistEntries) {
1303  // Make sure to clean up the Visited map before continuing on to
1304  // PredTranslationFailure.
1305  for (unsigned i = 0; i < NewBlocks.size(); i++)
1306  Visited.erase(NewBlocks[i]);
1307  GotWorklistLimit = true;
1308  goto PredTranslationFailure;
1309  }
1310  WorklistEntries -= NewBlocks.size();
1311  Worklist.append(NewBlocks.begin(), NewBlocks.end());
1312  continue;
1313  }
1314 
1315  // We do need to do phi translation, if we know ahead of time we can't phi
1316  // translate this value, don't even try.
1317  if (!Pointer.IsPotentiallyPHITranslatable())
1318  goto PredTranslationFailure;
1319 
1320  // We may have added values to the cache list before this PHI translation.
1321  // If so, we haven't done anything to ensure that the cache remains sorted.
1322  // Sort it now (if needed) so that recursive invocations of
1323  // getNonLocalPointerDepFromBB and other routines that could reuse the cache
1324  // value will only see properly sorted cache arrays.
1325  if (Cache && NumSortedEntries != Cache->size()) {
1326  SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
1327  NumSortedEntries = Cache->size();
1328  }
1329  Cache = nullptr;
1330 
1331  PredList.clear();
1332  for (BasicBlock *Pred : PredCache.get(BB)) {
1333  PredList.push_back(std::make_pair(Pred, Pointer));
1334 
1335  // Get the PHI translated pointer in this predecessor. This can fail if
1336  // not translatable, in which case the getAddr() returns null.
1337  PHITransAddr &PredPointer = PredList.back().second;
1338  PredPointer.PHITranslateValue(BB, Pred, &DT, /*MustDominate=*/false);
1339  Value *PredPtrVal = PredPointer.getAddr();
1340 
1341  // Check to see if we have already visited this pred block with another
1342  // pointer. If so, we can't do this lookup. This failure can occur
1343  // with PHI translation when a critical edge exists and the PHI node in
1344  // the successor translates to a pointer value different than the
1345  // pointer the block was first analyzed with.
1346  std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> InsertRes =
1347  Visited.insert(std::make_pair(Pred, PredPtrVal));
1348 
1349  if (!InsertRes.second) {
1350  // We found the pred; take it off the list of preds to visit.
1351  PredList.pop_back();
1352 
1353  // If the predecessor was visited with PredPtr, then we already did
1354  // the analysis and can ignore it.
1355  if (InsertRes.first->second == PredPtrVal)
1356  continue;
1357 
1358  // Otherwise, the block was previously analyzed with a different
1359  // pointer. We can't represent the result of this case, so we just
1360  // treat this as a phi translation failure.
1361 
1362  // Make sure to clean up the Visited map before continuing on to
1363  // PredTranslationFailure.
1364  for (unsigned i = 0, n = PredList.size(); i < n; ++i)
1365  Visited.erase(PredList[i].first);
1366 
1367  goto PredTranslationFailure;
1368  }
1369  }
1370 
1371  // Actually process results here; this need to be a separate loop to avoid
1372  // calling getNonLocalPointerDepFromBB for blocks we don't want to return
1373  // any results for. (getNonLocalPointerDepFromBB will modify our
1374  // datastructures in ways the code after the PredTranslationFailure label
1375  // doesn't expect.)
1376  for (unsigned i = 0, n = PredList.size(); i < n; ++i) {
1377  BasicBlock *Pred = PredList[i].first;
1378  PHITransAddr &PredPointer = PredList[i].second;
1379  Value *PredPtrVal = PredPointer.getAddr();
1380 
1381  bool CanTranslate = true;
1382  // If PHI translation was unable to find an available pointer in this
1383  // predecessor, then we have to assume that the pointer is clobbered in
1384  // that predecessor. We can still do PRE of the load, which would insert
1385  // a computation of the pointer in this predecessor.
1386  if (!PredPtrVal)
1387  CanTranslate = false;
1388 
1389  // FIXME: it is entirely possible that PHI translating will end up with
1390  // the same value. Consider PHI translating something like:
1391  // X = phi [x, bb1], [y, bb2]. PHI translating for bb1 doesn't *need*
1392  // to recurse here, pedantically speaking.
1393 
1394  // If getNonLocalPointerDepFromBB fails here, that means the cached
1395  // result conflicted with the Visited list; we have to conservatively
1396  // assume it is unknown, but this also does not block PRE of the load.
1397  if (!CanTranslate ||
1398  !getNonLocalPointerDepFromBB(QueryInst, PredPointer,
1399  Loc.getWithNewPtr(PredPtrVal), isLoad,
1400  Pred, Result, Visited)) {
1401  // Add the entry to the Result list.
1402  NonLocalDepResult Entry(Pred, MemDepResult::getUnknown(), PredPtrVal);
1403  Result.push_back(Entry);
1404 
1405  // Since we had a phi translation failure, the cache for CacheKey won't
1406  // include all of the entries that we need to immediately satisfy future
1407  // queries. Mark this in NonLocalPointerDeps by setting the
1408  // BBSkipFirstBlockPair pointer to null. This requires reuse of the
1409  // cached value to do more work but not miss the phi trans failure.
1410  NonLocalPointerInfo &NLPI = NonLocalPointerDeps[CacheKey];
1411  NLPI.Pair = BBSkipFirstBlockPair();
1412  continue;
1413  }
1414  }
1415 
1416  // Refresh the CacheInfo/Cache pointer so that it isn't invalidated.
1417  CacheInfo = &NonLocalPointerDeps[CacheKey];
1418  Cache = &CacheInfo->NonLocalDeps;
1419  NumSortedEntries = Cache->size();
1420 
1421  // Since we did phi translation, the "Cache" set won't contain all of the
1422  // results for the query. This is ok (we can still use it to accelerate
1423  // specific block queries) but we can't do the fastpath "return all
1424  // results from the set" Clear out the indicator for this.
1425  CacheInfo->Pair = BBSkipFirstBlockPair();
1426  SkipFirstBlock = false;
1427  continue;
1428 
1429  PredTranslationFailure:
1430  // The following code is "failure"; we can't produce a sane translation
1431  // for the given block. It assumes that we haven't modified any of
1432  // our datastructures while processing the current block.
1433 
1434  if (!Cache) {
1435  // Refresh the CacheInfo/Cache pointer if it got invalidated.
1436  CacheInfo = &NonLocalPointerDeps[CacheKey];
1437  Cache = &CacheInfo->NonLocalDeps;
1438  NumSortedEntries = Cache->size();
1439  }
1440 
1441  // Since we failed phi translation, the "Cache" set won't contain all of the
1442  // results for the query. This is ok (we can still use it to accelerate
1443  // specific block queries) but we can't do the fastpath "return all
1444  // results from the set". Clear out the indicator for this.
1445  CacheInfo->Pair = BBSkipFirstBlockPair();
1446 
1447  // If *nothing* works, mark the pointer as unknown.
1448  //
1449  // If this is the magic first block, return this as a clobber of the whole
1450  // incoming value. Since we can't phi translate to one of the predecessors,
1451  // we have to bail out.
1452  if (SkipFirstBlock)
1453  return false;
1454 
1455  bool foundBlock = false;
1456  for (NonLocalDepEntry &I : llvm::reverse(*Cache)) {
1457  if (I.getBB() != BB)
1458  continue;
1459 
1460  assert((GotWorklistLimit || I.getResult().isNonLocal() ||
1461  !DT.isReachableFromEntry(BB)) &&
1462  "Should only be here with transparent block");
1463  foundBlock = true;
1464  I.setResult(MemDepResult::getUnknown());
1465  Result.push_back(
1466  NonLocalDepResult(I.getBB(), I.getResult(), Pointer.getAddr()));
1467  break;
1468  }
1469  (void)foundBlock; (void)GotWorklistLimit;
1470  assert((foundBlock || GotWorklistLimit) && "Current block not in cache?");
1471  }
1472 
1473  // Okay, we're done now. If we added new values to the cache, re-sort it.
1474  SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
1475  LLVM_DEBUG(AssertSorted(*Cache));
1476  return true;
1477 }
1478 
1479 /// If P exists in CachedNonLocalPointerInfo or NonLocalDefsCache, remove it.
1480 void MemoryDependenceResults::RemoveCachedNonLocalPointerDependencies(
1481  ValueIsLoadPair P) {
1482 
1483  // Most of the time this cache is empty.
1484  if (!NonLocalDefsCache.empty()) {
1485  auto it = NonLocalDefsCache.find(P.getPointer());
1486  if (it != NonLocalDefsCache.end()) {
1487  RemoveFromReverseMap(ReverseNonLocalDefsCache,
1488  it->second.getResult().getInst(), P.getPointer());
1489  NonLocalDefsCache.erase(it);
1490  }
1491 
1492  if (auto *I = dyn_cast<Instruction>(P.getPointer())) {
1493  auto toRemoveIt = ReverseNonLocalDefsCache.find(I);
1494  if (toRemoveIt != ReverseNonLocalDefsCache.end()) {
1495  for (const auto &entry : toRemoveIt->second)
1496  NonLocalDefsCache.erase(entry);
1497  ReverseNonLocalDefsCache.erase(toRemoveIt);
1498  }
1499  }
1500  }
1501 
1502  CachedNonLocalPointerInfo::iterator It = NonLocalPointerDeps.find(P);
1503  if (It == NonLocalPointerDeps.end())
1504  return;
1505 
1506  // Remove all of the entries in the BB->val map. This involves removing
1507  // instructions from the reverse map.
1508  NonLocalDepInfo &PInfo = It->second.NonLocalDeps;
1509 
1510  for (unsigned i = 0, e = PInfo.size(); i != e; ++i) {
1511  Instruction *Target = PInfo[i].getResult().getInst();
1512  if (!Target)
1513  continue; // Ignore non-local dep results.
1514  assert(Target->getParent() == PInfo[i].getBB());
1515 
1516  // Eliminating the dirty entry from 'Cache', so update the reverse info.
1517  RemoveFromReverseMap(ReverseNonLocalPtrDeps, Target, P);
1518  }
1519 
1520  // Remove P from NonLocalPointerDeps (which deletes NonLocalDepInfo).
1521  NonLocalPointerDeps.erase(It);
1522 }
1523 
1525  // If Ptr isn't really a pointer, just ignore it.
1526  if (!Ptr->getType()->isPointerTy())
1527  return;
1528  // Flush store info for the pointer.
1529  RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, false));
1530  // Flush load info for the pointer.
1531  RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, true));
1532  // Invalidate phis that use the pointer.
1533  PV.invalidateValue(Ptr);
1534 }
1535 
1537  PredCache.clear();
1538 }
1539 
1541  // Walk through the Non-local dependencies, removing this one as the value
1542  // for any cached queries.
1543  NonLocalDepMapType::iterator NLDI = NonLocalDeps.find(RemInst);
1544  if (NLDI != NonLocalDeps.end()) {
1545  NonLocalDepInfo &BlockMap = NLDI->second.first;
1546  for (auto &Entry : BlockMap)
1547  if (Instruction *Inst = Entry.getResult().getInst())
1548  RemoveFromReverseMap(ReverseNonLocalDeps, Inst, RemInst);
1549  NonLocalDeps.erase(NLDI);
1550  }
1551 
1552  // If we have a cached local dependence query for this instruction, remove it.
1553  LocalDepMapType::iterator LocalDepEntry = LocalDeps.find(RemInst);
1554  if (LocalDepEntry != LocalDeps.end()) {
1555  // Remove us from DepInst's reverse set now that the local dep info is gone.
1556  if (Instruction *Inst = LocalDepEntry->second.getInst())
1557  RemoveFromReverseMap(ReverseLocalDeps, Inst, RemInst);
1558 
1559  // Remove this local dependency info.
1560  LocalDeps.erase(LocalDepEntry);
1561  }
1562 
1563  // If we have any cached pointer dependencies on this instruction, remove
1564  // them. If the instruction has non-pointer type, then it can't be a pointer
1565  // base.
1566 
1567  // Remove it from both the load info and the store info. The instruction
1568  // can't be in either of these maps if it is non-pointer.
1569  if (RemInst->getType()->isPointerTy()) {
1570  RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, false));
1571  RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, true));
1572  }
1573 
1574  // Loop over all of the things that depend on the instruction we're removing.
1576 
1577  // If we find RemInst as a clobber or Def in any of the maps for other values,
1578  // we need to replace its entry with a dirty version of the instruction after
1579  // it. If RemInst is a terminator, we use a null dirty value.
1580  //
1581  // Using a dirty version of the instruction after RemInst saves having to scan
1582  // the entire block to get to this point.
1583  MemDepResult NewDirtyVal;
1584  if (!RemInst->isTerminator())
1585  NewDirtyVal = MemDepResult::getDirty(&*++RemInst->getIterator());
1586 
1587  ReverseDepMapType::iterator ReverseDepIt = ReverseLocalDeps.find(RemInst);
1588  if (ReverseDepIt != ReverseLocalDeps.end()) {
1589  // RemInst can't be the terminator if it has local stuff depending on it.
1590  assert(!ReverseDepIt->second.empty() && !RemInst->isTerminator() &&
1591  "Nothing can locally depend on a terminator");
1592 
1593  for (Instruction *InstDependingOnRemInst : ReverseDepIt->second) {
1594  assert(InstDependingOnRemInst != RemInst &&
1595  "Already removed our local dep info");
1596 
1597  LocalDeps[InstDependingOnRemInst] = NewDirtyVal;
1598 
1599  // Make sure to remember that new things depend on NewDepInst.
1600  assert(NewDirtyVal.getInst() &&
1601  "There is no way something else can have "
1602  "a local dep on this if it is a terminator!");
1603  ReverseDepsToAdd.push_back(
1604  std::make_pair(NewDirtyVal.getInst(), InstDependingOnRemInst));
1605  }
1606 
1607  ReverseLocalDeps.erase(ReverseDepIt);
1608 
1609  // Add new reverse deps after scanning the set, to avoid invalidating the
1610  // 'ReverseDeps' reference.
1611  while (!ReverseDepsToAdd.empty()) {
1612  ReverseLocalDeps[ReverseDepsToAdd.back().first].insert(
1613  ReverseDepsToAdd.back().second);
1614  ReverseDepsToAdd.pop_back();
1615  }
1616  }
1617 
1618  ReverseDepIt = ReverseNonLocalDeps.find(RemInst);
1619  if (ReverseDepIt != ReverseNonLocalDeps.end()) {
1620  for (Instruction *I : ReverseDepIt->second) {
1621  assert(I != RemInst && "Already removed NonLocalDep info for RemInst");
1622 
1623  PerInstNLInfo &INLD = NonLocalDeps[I];
1624  // The information is now dirty!
1625  INLD.second = true;
1626 
1627  for (auto &Entry : INLD.first) {
1628  if (Entry.getResult().getInst() != RemInst)
1629  continue;
1630 
1631  // Convert to a dirty entry for the subsequent instruction.
1632  Entry.setResult(NewDirtyVal);
1633 
1634  if (Instruction *NextI = NewDirtyVal.getInst())
1635  ReverseDepsToAdd.push_back(std::make_pair(NextI, I));
1636  }
1637  }
1638 
1639  ReverseNonLocalDeps.erase(ReverseDepIt);
1640 
1641  // Add new reverse deps after scanning the set, to avoid invalidating 'Set'
1642  while (!ReverseDepsToAdd.empty()) {
1643  ReverseNonLocalDeps[ReverseDepsToAdd.back().first].insert(
1644  ReverseDepsToAdd.back().second);
1645  ReverseDepsToAdd.pop_back();
1646  }
1647  }
1648 
1649  // If the instruction is in ReverseNonLocalPtrDeps then it appears as a
1650  // value in the NonLocalPointerDeps info.
1651  ReverseNonLocalPtrDepTy::iterator ReversePtrDepIt =
1652  ReverseNonLocalPtrDeps.find(RemInst);
1653  if (ReversePtrDepIt != ReverseNonLocalPtrDeps.end()) {
1655  ReversePtrDepsToAdd;
1656 
1657  for (ValueIsLoadPair P : ReversePtrDepIt->second) {
1658  assert(P.getPointer() != RemInst &&
1659  "Already removed NonLocalPointerDeps info for RemInst");
1660 
1661  NonLocalDepInfo &NLPDI = NonLocalPointerDeps[P].NonLocalDeps;
1662 
1663  // The cache is not valid for any specific block anymore.
1664  NonLocalPointerDeps[P].Pair = BBSkipFirstBlockPair();
1665 
1666  // Update any entries for RemInst to use the instruction after it.
1667  for (auto &Entry : NLPDI) {
1668  if (Entry.getResult().getInst() != RemInst)
1669  continue;
1670 
1671  // Convert to a dirty entry for the subsequent instruction.
1672  Entry.setResult(NewDirtyVal);
1673 
1674  if (Instruction *NewDirtyInst = NewDirtyVal.getInst())
1675  ReversePtrDepsToAdd.push_back(std::make_pair(NewDirtyInst, P));
1676  }
1677 
1678  // Re-sort the NonLocalDepInfo. Changing the dirty entry to its
1679  // subsequent value may invalidate the sortedness.
1680  llvm::sort(NLPDI);
1681  }
1682 
1683  ReverseNonLocalPtrDeps.erase(ReversePtrDepIt);
1684 
1685  while (!ReversePtrDepsToAdd.empty()) {
1686  ReverseNonLocalPtrDeps[ReversePtrDepsToAdd.back().first].insert(
1687  ReversePtrDepsToAdd.back().second);
1688  ReversePtrDepsToAdd.pop_back();
1689  }
1690  }
1691 
1692  // Invalidate phis that use the removed instruction.
1693  PV.invalidateValue(RemInst);
1694 
1695  assert(!NonLocalDeps.count(RemInst) && "RemInst got reinserted?");
1696  LLVM_DEBUG(verifyRemoved(RemInst));
1697 }
1698 
1699 /// Verify that the specified instruction does not occur in our internal data
1700 /// structures.
1701 ///
1702 /// This function verifies by asserting in debug builds.
1703 void MemoryDependenceResults::verifyRemoved(Instruction *D) const {
1704 #ifndef NDEBUG
1705  for (const auto &DepKV : LocalDeps) {
1706  assert(DepKV.first != D && "Inst occurs in data structures");
1707  assert(DepKV.second.getInst() != D && "Inst occurs in data structures");
1708  }
1709 
1710  for (const auto &DepKV : NonLocalPointerDeps) {
1711  assert(DepKV.first.getPointer() != D && "Inst occurs in NLPD map key");
1712  for (const auto &Entry : DepKV.second.NonLocalDeps)
1713  assert(Entry.getResult().getInst() != D && "Inst occurs as NLPD value");
1714  }
1715 
1716  for (const auto &DepKV : NonLocalDeps) {
1717  assert(DepKV.first != D && "Inst occurs in data structures");
1718  const PerInstNLInfo &INLD = DepKV.second;
1719  for (const auto &Entry : INLD.first)
1720  assert(Entry.getResult().getInst() != D &&
1721  "Inst occurs in data structures");
1722  }
1723 
1724  for (const auto &DepKV : ReverseLocalDeps) {
1725  assert(DepKV.first != D && "Inst occurs in data structures");
1726  for (Instruction *Inst : DepKV.second)
1727  assert(Inst != D && "Inst occurs in data structures");
1728  }
1729 
1730  for (const auto &DepKV : ReverseNonLocalDeps) {
1731  assert(DepKV.first != D && "Inst occurs in data structures");
1732  for (Instruction *Inst : DepKV.second)
1733  assert(Inst != D && "Inst occurs in data structures");
1734  }
1735 
1736  for (const auto &DepKV : ReverseNonLocalPtrDeps) {
1737  assert(DepKV.first != D && "Inst occurs in rev NLPD map");
1738 
1739  for (ValueIsLoadPair P : DepKV.second)
1740  assert(P != ValueIsLoadPair(D, false) && P != ValueIsLoadPair(D, true) &&
1741  "Inst occurs in ReverseNonLocalPtrDeps map");
1742  }
1743 #endif
1744 }
1745 
1746 AnalysisKey MemoryDependenceAnalysis::Key;
1747 
1750  auto &AA = AM.getResult<AAManager>(F);
1751  auto &AC = AM.getResult<AssumptionAnalysis>(F);
1752  auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
1753  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1754  auto &PV = AM.getResult<PhiValuesAnalysis>(F);
1755  return MemoryDependenceResults(AA, AC, TLI, DT, PV);
1756 }
1757 
1759 
1761  "Memory Dependence Analysis", false, true)
1769 
1770 MemoryDependenceWrapperPass::MemoryDependenceWrapperPass() : FunctionPass(ID) {
1772 }
1773 
1775 
1777  MemDep.reset();
1778 }
1779 
1781  AU.setPreservesAll();
1787 }
1788 
1791  // Check whether our analysis is preserved.
1792  auto PAC = PA.getChecker<MemoryDependenceAnalysis>();
1793  if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>())
1794  // If not, give up now.
1795  return true;
1796 
1797  // Check whether the analyses we depend on became invalid for any reason.
1798  if (Inv.invalidate<AAManager>(F, PA) ||
1799  Inv.invalidate<AssumptionAnalysis>(F, PA) ||
1800  Inv.invalidate<DominatorTreeAnalysis>(F, PA) ||
1801  Inv.invalidate<PhiValuesAnalysis>(F, PA))
1802  return true;
1803 
1804  // Otherwise this analysis result remains valid.
1805  return false;
1806 }
1807 
1809  return BlockScanLimit;
1810 }
1811 
1813  auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
1814  auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1815  auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
1816  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1817  auto &PV = getAnalysis<PhiValuesWrapperPass>().getResult();
1818  MemDep.emplace(AA, AC, TLI, DT, PV);
1819  return false;
1820 }
The access may reference and may modify the value stored in memory.
void invalidateCachedPointerInfo(Value *Ptr)
Invalidates cached information about the specified pointer, because it may be too conservative in mem...
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
void invalidateCachedPredecessors()
Clears the PredIteratorCache info.
bool invalidate(IRUnitT &IR, const PreservedAnalyses &PA)
Trigger the invalidation of some other analysis pass if not already handled and return whether it was...
Definition: PassManager.h:660
bool isSimple() const
Definition: Instructions.h:266
iterator_range< use_iterator > uses()
Definition: Value.h:355
The access neither references nor modifies the value stored in memory.
bool isStrongerThanUnordered(AtomicOrdering ao)
Provides a lazy, caching interface for making common memory aliasing information queries, backed by LLVM&#39;s alias analysis passes.
LLVM_NODISCARD ModRefInfo clearMust(const ModRefInfo MRI)
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Atomic ordering constants.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:770
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
PointerTy getPointer() const
#define LLVM_FALLTHROUGH
Definition: Compiler.h:86
MemDepResult getInvariantGroupPointerDependency(LoadInst *LI, BasicBlock *BB)
This analysis looks for other loads and stores with invariant.group metadata and the same pointer ope...
An instruction for ordering other memory operations.
Definition: Instructions.h:444
MemoryLocation getWithNewPtr(const Value *NewPtr) const
This class provides various memory handling functions that manipulate MemoryBlock instances...
Definition: Memory.h:46
bool isPrecise() const
This class represents a function call, abstracting a target machine&#39;s calling convention.
bool isNonLocal() const
Tests if this MemDepResult represents a query that is transparent to the start of the block...
This file contains the declarations for metadata subclasses.
An immutable pass that tracks lazily created AssumptionCache objects.
static cl::opt< unsigned > BlockNumberLimit("memdep-block-number-limit", cl::Hidden, cl::init(1000), cl::desc("The number of blocks to scan during memory " "dependency analysis (default = 1000)"))
bool mayWriteToMemory() const
Return true if this instruction may modify memory.
The two locations do not alias at all.
Definition: AliasAnalysis.h:85
bool onlyReadsMemory(ImmutableCallSite CS)
Checks if the specified call is known to only read from non-volatile memory (or not access memory at ...
bool isTerminator() const
Definition: Instruction.h:129
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:321
STATISTIC(NumFunctions, "Total number of functions")
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:231
F(f)
block Block Frequency true
An instruction for reading from memory.
Definition: Instructions.h:168
Hexagon Common GEP
This defines the Use class.
LLVM_NODISCARD bool isModAndRefSet(const ModRefInfo MRI)
The analysis pass which yields a PhiValues.
Definition: PhiValues.h:119
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:300
const CallInst * isFreeCall(const Value *I, const TargetLibraryInfo *TLI)
isFreeCall - Returns non-null if the value is a call to the builtin free()
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:264
PreservedAnalysisChecker getChecker() const
Build a checker for this PreservedAnalyses and the specified analysis type.
Definition: PassManager.h:305
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:192
AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
The main low level interface to the alias analysis implementation.
AnalysisUsage & addRequired()
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:134
ArrayRef< BasicBlock * > get(BasicBlock *BB)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
bool isDef() const
Tests if this MemDepResult represents a query that is an instruction definition dependency.
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:364
bool isClobber() const
Tests if this MemDepResult represents a query that is an instruction clobber dependency.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
static unsigned getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs, unsigned MemLocSize, const LoadInst *LI)
Looks at a memory location for a load (specified by MemLocBase, Offs, and Size) and compares it again...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
static bool isLoad(int Opcode)
unsigned getDefaultBlockScanLimit() const
Some methods limit the number of instructions they will examine.
void getNonLocalPointerDependency(Instruction *QueryInst, SmallVectorImpl< NonLocalDepResult > &Result)
Perform a full dependency query for an access to the QueryInst&#39;s specified memory location...
static MemDepResult getDef(Instruction *Inst)
get methods: These are static ctor methods for creating various MemDepResult kinds.
The access may reference the value stored in memory.
This file contains the simple types necessary to represent the attributes associated with functions a...
InstrTy * getInstruction() const
Definition: CallSite.h:92
An analysis that produces MemoryDependenceResults for a function.
MemoryDependenceResults(AliasAnalysis &AA, AssumptionCache &AC, const TargetLibraryInfo &TLI, DominatorTree &DT, PhiValues &PV)
static bool isOrdered(const Instruction *I)
Definition: MemorySSA.cpp:1558
ELFYAML::ELF_STO Other
Definition: ELFYAML.cpp:773
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:251
bool isNoAliasFn(const Value *V, const TargetLibraryInfo *TLI, bool LookThroughBitCast=false)
Tests if a value is a call or invoke to a function that returns a NoAlias pointer (including malloc/c...
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
static MemoryLocation getForArgument(ImmutableCallSite CS, unsigned ArgIdx, const TargetLibraryInfo *TLI)
Return a location representing a particular argument of a call.
static void RemoveFromReverseMap(DenseMap< Instruction *, SmallPtrSet< KeyTy, 4 >> &ReverseMap, Instruction *Inst, KeyTy Val)
This is a helper function that removes Val from &#39;Inst&#39;s set in ReverseMap.
Value * GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, const DataLayout &DL)
Analyze the specified pointer to see if it can be expressed as a base pointer plus a constant offset...
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:217
An instruction for storing to memory.
Definition: Instructions.h:310
Wrapper pass for the legacy pass manager.
Definition: PhiValues.h:142
AliasResult
The possible results of an alias query.
Definition: AliasAnalysis.h:79
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:147
uint64_t getValue() const
bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal=false)
Checks whether the given location points to constant memory, or if OrLocal is true whether it points ...
static MemDepResult getUnknown()
const BasicBlock & getEntryBlock() const
Definition: Function.h:640
#define P(N)
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:410
const NonLocalDepInfo & getNonLocalCallDependency(CallSite QueryCS)
Perform a full dependency query for the specified call, returning the set of blocks that the value is...
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:154
bool isIdenticalToWhenDefined(const Instruction *I) const
This is like isIdenticalTo, except that it ignores the SubclassOptionalData flags, which may specify conditions under which the instruction&#39;s result is undefined.
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
PointerIntPair - This class implements a pair of a pointer and small integer.
PHITransAddr - An address value which tracks and handles phi translation.
Definition: PHITransAddr.h:36
MemoryLocation getWithoutAATags() const
MemoryLocation getWithNewSize(LocationSize NewSize) const
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:129
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:224
void invalidateValue(const Value *V)
Notify PhiValues that the cached information using V is no longer valid.
Definition: PhiValues.cpp:124
A manager for alias analyses.
This is a result from a NonLocal dependence query.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
MemDepResult getSimplePointerDependencyFrom(const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt, BasicBlock *BB, Instruction *QueryInst, unsigned *Limit=nullptr)
Represent the analysis usage information of a pass.
static MemDepResult getNonFuncLocal()
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
LocationSize Size
The maximum size of the location, in address-units, or UnknownSize if the size is not known...
Value * getPointerOperand()
Definition: Instructions.h:274
self_iterator getIterator()
Definition: ilist_node.h:82
void setResult(const MemDepResult &R)
uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition: MathExtras.h:640
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs, and aliases.
Definition: Value.cpp:539
This class represents the va_arg llvm instruction, which returns an argument of the specified type gi...
size_t size() const
Definition: SmallVector.h:53
INITIALIZE_PASS_BEGIN(MemoryDependenceWrapperPass, "memdep", "Memory Dependence Analysis", false, true) INITIALIZE_PASS_END(MemoryDependenceWrapperPass
static void SortNonLocalDepInfoCache(MemoryDependenceResults::NonLocalDepInfo &Cache, unsigned NumSortedEntries)
Sort the NonLocalDepInfo cache, given a certain number of elements in the array that are already prop...
A wrapper analysis pass for the legacy pass manager that exposes a MemoryDepnedenceResults instance...
Value * GetUnderlyingObject(Value *V, const DataLayout &DL, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value...
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
A memory dependence query can return one of three different answers.
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:972
static cl::opt< unsigned > BlockScanLimit("memdep-block-scan-limit", cl::Hidden, cl::init(100), cl::desc("The number of instructions to scan in a block in memory " "dependency analysis (default = 100)"))
void clear()
clear - Remove all information.
const Value * Ptr
The address of the start of the location.
Representation for a specific memory location.
size_type size() const
Definition: SmallPtrSet.h:93
A function analysis which provides an AssumptionCache.
The two locations precisely alias each other.
Definition: AliasAnalysis.h:91
MemDepResult getPointerDependencyFrom(const MemoryLocation &Loc, bool isLoad, BasicBlock::iterator ScanIt, BasicBlock *BB, Instruction *QueryInst=nullptr, unsigned *Limit=nullptr)
Returns the instruction on which a memory location depends.
Iterator for intrusive lists based on ilist_node.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
iterator end()
Definition: BasicBlock.h:266
bool PHITranslateValue(BasicBlock *CurBB, BasicBlock *PredBB, const DominatorTree *DT, bool MustDominate)
PHITranslateValue - PHI translate the current address up the CFG from CurBB to Pred, updating our state to reflect any needed changes.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
bool dominates(const Instruction *Def, const Use &U) const
Return true if Def dominates a use in User.
Definition: Dominators.cpp:249
Module.h This file contains the declarations for the Module class.
Provides information about what library functions are available for the current target.
static MemDepResult getClobber(Instruction *Inst)
MemoryDependenceResults run(Function &F, FunctionAnalysisManager &AM)
const MemDepResult & getResult() const
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:644
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:381
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
void getAnalysisUsage(AnalysisUsage &AU) const override
Does not modify anything. It uses Value Numbering and Alias Analysis.
The access may modify the value stored in memory.
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:56
Target - Wrapper for Target specific information.
void releaseMemory() override
Clean up memory in between runs.
bool NeedsPHITranslationFromBlock(BasicBlock *BB) const
NeedsPHITranslationFromBlock - Return true if moving from the specified BasicBlock to its predecessor...
Definition: PHITransAddr.h:64
ModRefInfo callCapturesBefore(const Instruction *I, const MemoryLocation &MemLoc, DominatorTree *DT, OrderedBasicBlock *OBB=nullptr)
Return information about whether a particular call site modifies or reads the specified memory locati...
ModRefInfo getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc)
getModRefInfo (for call sites) - Return information about whether a particular call site modifies or ...
void setPreservesAll()
Set by analyses that do not transform their input at all.
LLVM_NODISCARD bool isNoModRef(const ModRefInfo MRI)
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:91
bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
A trivial helper function to check to see if the specified pointers are must-alias.
std::vector< NonLocalDepEntry > NonLocalDepInfo
bool runOnFunction(Function &) override
Pass Implementation stuff. This doesn&#39;t do any analysis eagerly.
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:394
LLVM_NODISCARD bool isModSet(const ModRefInfo MRI)
bool isUnknown() const
Tests if this MemDepResult represents a query which cannot and/or will not be computed.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:133
AAMDNodes AATags
The metadata nodes which describes the aliasing of the location (each member is null if that kind of ...
static ModRefInfo GetLocation(const Instruction *Inst, MemoryLocation &Loc, const TargetLibraryInfo &TLI)
If the given instruction references a specific memory location, fill in Loc with the details...
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:230
Instruction * getInst() const
If this is a normal dependency, returns the instruction that is depended on.
void initializeMemoryDependenceWrapperPassPass(PassRegistry &)
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:56
This file provides utility analysis objects describing memory locations.
bool invalidate(Function &F, const PreservedAnalyses &PA, FunctionAnalysisManager::Invalidator &Inv)
Handle invalidation in the new PM.
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:108
#define I(x, y, z)
Definition: MD5.cpp:58
bool mayReadFromMemory() const
Return true if this instruction may read memory.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
AnalysisUsage & addRequiredTransitive()
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:142
bool IsPotentiallyPHITranslatable() const
IsPotentiallyPHITranslatable - If this needs PHI translation, return true if we have some hope of doi...
Memory Dependence Analysis
Value * getAddr() const
Definition: PHITransAddr.h:60
API to communicate dependencies between analyses during invalidation.
Definition: PassManager.h:642
auto upper_bound(R &&Range, ForwardIt I) -> decltype(adl_begin(Range))
Provide wrappers to std::upper_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1151
Analysis pass providing the TargetLibraryInfo.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This templated class represents "all analyses that operate over <a particular IR unit>" (e...
Definition: PassManager.h:92
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:115
LLVM Value Representation.
Definition: Value.h:73
void removeInstruction(Instruction *InstToRemove)
Removes an instruction from the dependence analysis, updating the dependence of instructions that pre...
bool hasValue() const
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
print Instructions which execute on loop entry
This is an entry in the NonLocalDepInfo cache.
A container for analyses that lazily runs them and caches their results.
bool fitsInLegalInteger(unsigned Width) const
Returns true if the specified type fits in a native integer type supported by the CPU...
Definition: DataLayout.h:317
static void AssertSorted(MemoryDependenceResults::NonLocalDepInfo &Cache, int Count=-1)
This method is used when -debug is specified to verify that cache arrays are properly kept sorted...
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:260
LLVM_NODISCARD bool isModOrRefSet(const ModRefInfo MRI)
static bool isVolatile(Instruction *Inst)
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object...
The two locations alias, but only due to a partial overlap.
Definition: AliasAnalysis.h:89
Dependence - This class represents a dependence between two memory memory references in a function...
#define LLVM_DEBUG(X)
Definition: Debug.h:123
static MemDepResult getNonLocal()
static const unsigned int NumResultsLimit
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: PassManager.h:71
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:44
const BasicBlock * getParent() const
Definition: Instruction.h:67
MemDepResult getDependency(Instruction *QueryInst)
Returns the instruction on which a memory operation depends.