LLVM  14.0.0git
DeadStoreElimination.cpp
Go to the documentation of this file.
1 //===- DeadStoreElimination.cpp - MemorySSA Backed Dead Store Elimination -===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // The code below implements dead store elimination using MemorySSA. It uses
10 // the following general approach: given a MemoryDef, walk upwards to find
11 // clobbering MemoryDefs that may be killed by the starting def. Then check
12 // that there are no uses that may read the location of the original MemoryDef
13 // in between both MemoryDefs. A bit more concretely:
14 //
15 // For all MemoryDefs StartDef:
16 // 1. Get the next dominating clobbering MemoryDef (MaybeDeadAccess) by walking
17 // upwards.
18 // 2. Check that there are no reads between MaybeDeadAccess and the StartDef by
19 // checking all uses starting at MaybeDeadAccess and walking until we see
20 // StartDef.
21 // 3. For each found CurrentDef, check that:
22 // 1. There are no barrier instructions between CurrentDef and StartDef (like
23 // throws or stores with ordering constraints).
24 // 2. StartDef is executed whenever CurrentDef is executed.
25 // 3. StartDef completely overwrites CurrentDef.
26 // 4. Erase CurrentDef from the function and MemorySSA.
27 //
28 //===----------------------------------------------------------------------===//
29 
31 #include "llvm/ADT/APInt.h"
32 #include "llvm/ADT/DenseMap.h"
33 #include "llvm/ADT/MapVector.h"
35 #include "llvm/ADT/SetVector.h"
36 #include "llvm/ADT/SmallPtrSet.h"
37 #include "llvm/ADT/SmallVector.h"
38 #include "llvm/ADT/Statistic.h"
39 #include "llvm/ADT/StringRef.h"
43 #include "llvm/Analysis/LoopInfo.h"
52 #include "llvm/IR/Argument.h"
53 #include "llvm/IR/BasicBlock.h"
54 #include "llvm/IR/Constant.h"
55 #include "llvm/IR/Constants.h"
56 #include "llvm/IR/DataLayout.h"
57 #include "llvm/IR/Dominators.h"
58 #include "llvm/IR/Function.h"
59 #include "llvm/IR/IRBuilder.h"
60 #include "llvm/IR/InstIterator.h"
61 #include "llvm/IR/InstrTypes.h"
62 #include "llvm/IR/Instruction.h"
63 #include "llvm/IR/Instructions.h"
64 #include "llvm/IR/IntrinsicInst.h"
65 #include "llvm/IR/Intrinsics.h"
66 #include "llvm/IR/LLVMContext.h"
67 #include "llvm/IR/Module.h"
68 #include "llvm/IR/PassManager.h"
69 #include "llvm/IR/PatternMatch.h"
70 #include "llvm/IR/Value.h"
71 #include "llvm/InitializePasses.h"
72 #include "llvm/Pass.h"
73 #include "llvm/Support/Casting.h"
75 #include "llvm/Support/Debug.h"
80 #include "llvm/Transforms/Scalar.h"
84 #include <algorithm>
85 #include <cassert>
86 #include <cstddef>
87 #include <cstdint>
88 #include <iterator>
89 #include <map>
90 #include <utility>
91 
92 using namespace llvm;
93 using namespace PatternMatch;
94 
95 #define DEBUG_TYPE "dse"
96 
97 STATISTIC(NumRemainingStores, "Number of stores remaining after DSE");
98 STATISTIC(NumRedundantStores, "Number of redundant stores deleted");
99 STATISTIC(NumFastStores, "Number of stores deleted");
100 STATISTIC(NumFastOther, "Number of other instrs removed");
101 STATISTIC(NumCompletePartials, "Number of stores dead by later partials");
102 STATISTIC(NumModifiedStores, "Number of stores modified");
103 STATISTIC(NumCFGChecks, "Number of stores modified");
104 STATISTIC(NumCFGTries, "Number of stores modified");
105 STATISTIC(NumCFGSuccess, "Number of stores modified");
106 STATISTIC(NumGetDomMemoryDefPassed,
107  "Number of times a valid candidate is returned from getDomMemoryDef");
108 STATISTIC(NumDomMemDefChecks,
109  "Number iterations check for reads in getDomMemoryDef");
110 
111 DEBUG_COUNTER(MemorySSACounter, "dse-memoryssa",
112  "Controls which MemoryDefs are eliminated.");
113 
114 static cl::opt<bool>
115 EnablePartialOverwriteTracking("enable-dse-partial-overwrite-tracking",
116  cl::init(true), cl::Hidden,
117  cl::desc("Enable partial-overwrite tracking in DSE"));
118 
119 static cl::opt<bool>
120 EnablePartialStoreMerging("enable-dse-partial-store-merging",
121  cl::init(true), cl::Hidden,
122  cl::desc("Enable partial store merging in DSE"));
123 
124 static cl::opt<unsigned>
125  MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(150), cl::Hidden,
126  cl::desc("The number of memory instructions to scan for "
127  "dead store elimination (default = 150)"));
129  "dse-memoryssa-walklimit", cl::init(90), cl::Hidden,
130  cl::desc("The maximum number of steps while walking upwards to find "
131  "MemoryDefs that may be killed (default = 90)"));
132 
134  "dse-memoryssa-partial-store-limit", cl::init(5), cl::Hidden,
135  cl::desc("The maximum number candidates that only partially overwrite the "
136  "killing MemoryDef to consider"
137  " (default = 5)"));
138 
140  "dse-memoryssa-defs-per-block-limit", cl::init(5000), cl::Hidden,
141  cl::desc("The number of MemoryDefs we consider as candidates to eliminated "
142  "other stores per basic block (default = 5000)"));
143 
145  "dse-memoryssa-samebb-cost", cl::init(1), cl::Hidden,
146  cl::desc(
147  "The cost of a step in the same basic block as the killing MemoryDef"
148  "(default = 1)"));
149 
150 static cl::opt<unsigned>
151  MemorySSAOtherBBStepCost("dse-memoryssa-otherbb-cost", cl::init(5),
152  cl::Hidden,
153  cl::desc("The cost of a step in a different basic "
154  "block than the killing MemoryDef"
155  "(default = 5)"));
156 
158  "dse-memoryssa-path-check-limit", cl::init(50), cl::Hidden,
159  cl::desc("The maximum number of blocks to check when trying to prove that "
160  "all paths to an exit go through a killing block (default = 50)"));
161 
162 //===----------------------------------------------------------------------===//
163 // Helper functions
164 //===----------------------------------------------------------------------===//
165 using OverlapIntervalsTy = std::map<int64_t, int64_t>;
167 
168 /// Does this instruction write some memory? This only returns true for things
169 /// that we can analyze with other helpers below.
171  const TargetLibraryInfo &TLI) {
172  if (isa<StoreInst>(I))
173  return true;
174  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
175  switch (II->getIntrinsicID()) {
176  default:
177  return false;
178  case Intrinsic::memset:
179  case Intrinsic::memmove:
180  case Intrinsic::memcpy:
181  case Intrinsic::memcpy_inline:
182  case Intrinsic::memcpy_element_unordered_atomic:
183  case Intrinsic::memmove_element_unordered_atomic:
184  case Intrinsic::memset_element_unordered_atomic:
185  case Intrinsic::init_trampoline:
186  case Intrinsic::lifetime_end:
187  case Intrinsic::masked_store:
188  return true;
189  }
190  }
191  if (auto *CB = dyn_cast<CallBase>(I)) {
192  LibFunc LF;
193  if (TLI.getLibFunc(*CB, LF) && TLI.has(LF)) {
194  switch (LF) {
195  case LibFunc_strcpy:
196  case LibFunc_strncpy:
197  case LibFunc_strcat:
198  case LibFunc_strncat:
199  return true;
200  default:
201  return false;
202  }
203  }
204  }
205  return false;
206 }
207 
208 /// Return a Location stored to by the specified instruction. If isRemovable
209 /// returns true, this function and getLocForRead completely describe the memory
210 /// operations for this instruction.
212  const TargetLibraryInfo &TLI) {
213  if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
214  return MemoryLocation::get(SI);
215 
216  // memcpy/memmove/memset.
217  if (auto *MI = dyn_cast<AnyMemIntrinsic>(Inst))
219 
220  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
221  switch (II->getIntrinsicID()) {
222  default:
223  return MemoryLocation(); // Unhandled intrinsic.
224  case Intrinsic::init_trampoline:
225  return MemoryLocation::getAfter(II->getArgOperand(0));
226  case Intrinsic::masked_store:
227  return MemoryLocation::getForArgument(II, 1, TLI);
228  case Intrinsic::lifetime_end: {
229  uint64_t Len = cast<ConstantInt>(II->getArgOperand(0))->getZExtValue();
230  return MemoryLocation(II->getArgOperand(1), Len);
231  }
232  }
233  }
234  if (auto *CB = dyn_cast<CallBase>(Inst))
235  // All the supported TLI functions so far happen to have dest as their
236  // first argument.
237  return MemoryLocation::getAfter(CB->getArgOperand(0));
238  return MemoryLocation();
239 }
240 
241 /// If the value of this instruction and the memory it writes to is unused, may
242 /// we delete this instruction?
243 static bool isRemovable(Instruction *I) {
244  // Don't remove volatile/atomic stores.
245  if (StoreInst *SI = dyn_cast<StoreInst>(I))
246  return SI->isUnordered();
247 
248  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
249  switch (II->getIntrinsicID()) {
250  default: llvm_unreachable("doesn't pass 'hasAnalyzableMemoryWrite' predicate");
251  case Intrinsic::lifetime_end:
252  // Never remove dead lifetime_end's, e.g. because it is followed by a
253  // free.
254  return false;
255  case Intrinsic::init_trampoline:
256  // Always safe to remove init_trampoline.
257  return true;
258  case Intrinsic::memset:
259  case Intrinsic::memmove:
260  case Intrinsic::memcpy:
261  case Intrinsic::memcpy_inline:
262  // Don't remove volatile memory intrinsics.
263  return !cast<MemIntrinsic>(II)->isVolatile();
264  case Intrinsic::memcpy_element_unordered_atomic:
265  case Intrinsic::memmove_element_unordered_atomic:
266  case Intrinsic::memset_element_unordered_atomic:
267  case Intrinsic::masked_store:
268  return true;
269  }
270  }
271 
272  // note: only get here for calls with analyzable writes - i.e. libcalls
273  if (auto *CB = dyn_cast<CallBase>(I))
274  return CB->use_empty();
275 
276  return false;
277 }
278 
279 /// Returns true if the end of this instruction can be safely shortened in
280 /// length.
282  // Don't shorten stores for now
283  if (isa<StoreInst>(I))
284  return false;
285 
286  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
287  switch (II->getIntrinsicID()) {
288  default: return false;
289  case Intrinsic::memset:
290  case Intrinsic::memcpy:
291  case Intrinsic::memcpy_element_unordered_atomic:
292  case Intrinsic::memset_element_unordered_atomic:
293  // Do shorten memory intrinsics.
294  // FIXME: Add memmove if it's also safe to transform.
295  return true;
296  }
297  }
298 
299  // Don't shorten libcalls calls for now.
300 
301  return false;
302 }
303 
304 /// Returns true if the beginning of this instruction can be safely shortened
305 /// in length.
307  // FIXME: Handle only memset for now. Supporting memcpy/memmove should be
308  // easily done by offsetting the source address.
309  return isa<AnyMemSetInst>(I);
310 }
311 
312 static uint64_t getPointerSize(const Value *V, const DataLayout &DL,
313  const TargetLibraryInfo &TLI,
314  const Function *F) {
315  uint64_t Size;
316  ObjectSizeOpts Opts;
318 
319  if (getObjectSize(V, Size, DL, &TLI, Opts))
320  return Size;
322 }
323 
324 namespace {
325 
326 enum OverwriteResult {
327  OW_Begin,
328  OW_Complete,
329  OW_End,
330  OW_PartialEarlierWithFullLater,
331  OW_MaybePartial,
332  OW_Unknown
333 };
334 
335 } // end anonymous namespace
336 
337 /// Check if two instruction are masked stores that completely
338 /// overwrite one another. More specifically, \p KillingI has to
339 /// overwrite \p DeadI.
340 static OverwriteResult isMaskedStoreOverwrite(const Instruction *KillingI,
341  const Instruction *DeadI,
342  BatchAAResults &AA) {
343  const auto *KillingII = dyn_cast<IntrinsicInst>(KillingI);
344  const auto *DeadII = dyn_cast<IntrinsicInst>(DeadI);
345  if (KillingII == nullptr || DeadII == nullptr)
346  return OW_Unknown;
347  if (KillingII->getIntrinsicID() != Intrinsic::masked_store ||
348  DeadII->getIntrinsicID() != Intrinsic::masked_store)
349  return OW_Unknown;
350  // Pointers.
351  Value *KillingPtr = KillingII->getArgOperand(1)->stripPointerCasts();
352  Value *DeadPtr = DeadII->getArgOperand(1)->stripPointerCasts();
353  if (KillingPtr != DeadPtr && !AA.isMustAlias(KillingPtr, DeadPtr))
354  return OW_Unknown;
355  // Masks.
356  // TODO: check that KillingII's mask is a superset of the DeadII's mask.
357  if (KillingII->getArgOperand(3) != DeadII->getArgOperand(3))
358  return OW_Unknown;
359  return OW_Complete;
360 }
361 
362 /// Return 'OW_Complete' if a store to the 'KillingLoc' location completely
363 /// overwrites a store to the 'DeadLoc' location, 'OW_End' if the end of the
364 /// 'DeadLoc' location is completely overwritten by 'KillingLoc', 'OW_Begin'
365 /// if the beginning of the 'DeadLoc' location is overwritten by 'KillingLoc'.
366 /// 'OW_PartialEarlierWithFullLater' means that a dead (big) store was
367 /// overwritten by a killing (smaller) store which doesn't write outside the big
368 /// store's memory locations. Returns 'OW_Unknown' if nothing can be determined.
369 /// NOTE: This function must only be called if both \p KillingLoc and \p
370 /// DeadLoc belong to the same underlying object with valid \p KillingOff and
371 /// \p DeadOff.
372 static OverwriteResult isPartialOverwrite(const MemoryLocation &KillingLoc,
373  const MemoryLocation &DeadLoc,
374  int64_t KillingOff, int64_t DeadOff,
375  Instruction *DeadI,
376  InstOverlapIntervalsTy &IOL) {
377  const uint64_t KillingSize = KillingLoc.Size.getValue();
378  const uint64_t DeadSize = DeadLoc.Size.getValue();
379  // We may now overlap, although the overlap is not complete. There might also
380  // be other incomplete overlaps, and together, they might cover the complete
381  // dead store.
382  // Note: The correctness of this logic depends on the fact that this function
383  // is not even called providing DepWrite when there are any intervening reads.
385  KillingOff < int64_t(DeadOff + DeadSize) &&
386  int64_t(KillingOff + KillingSize) >= DeadOff) {
387 
388  // Insert our part of the overlap into the map.
389  auto &IM = IOL[DeadI];
390  LLVM_DEBUG(dbgs() << "DSE: Partial overwrite: DeadLoc [" << DeadOff << ", "
391  << int64_t(DeadOff + DeadSize) << ") KillingLoc ["
392  << KillingOff << ", " << int64_t(KillingOff + KillingSize)
393  << ")\n");
394 
395  // Make sure that we only insert non-overlapping intervals and combine
396  // adjacent intervals. The intervals are stored in the map with the ending
397  // offset as the key (in the half-open sense) and the starting offset as
398  // the value.
399  int64_t KillingIntStart = KillingOff;
400  int64_t KillingIntEnd = KillingOff + KillingSize;
401 
402  // Find any intervals ending at, or after, KillingIntStart which start
403  // before KillingIntEnd.
404  auto ILI = IM.lower_bound(KillingIntStart);
405  if (ILI != IM.end() && ILI->second <= KillingIntEnd) {
406  // This existing interval is overlapped with the current store somewhere
407  // in [KillingIntStart, KillingIntEnd]. Merge them by erasing the existing
408  // intervals and adjusting our start and end.
409  KillingIntStart = std::min(KillingIntStart, ILI->second);
410  KillingIntEnd = std::max(KillingIntEnd, ILI->first);
411  ILI = IM.erase(ILI);
412 
413  // Continue erasing and adjusting our end in case other previous
414  // intervals are also overlapped with the current store.
415  //
416  // |--- dead 1 ---| |--- dead 2 ---|
417  // |------- killing---------|
418  //
419  while (ILI != IM.end() && ILI->second <= KillingIntEnd) {
420  assert(ILI->second > KillingIntStart && "Unexpected interval");
421  KillingIntEnd = std::max(KillingIntEnd, ILI->first);
422  ILI = IM.erase(ILI);
423  }
424  }
425 
426  IM[KillingIntEnd] = KillingIntStart;
427 
428  ILI = IM.begin();
429  if (ILI->second <= DeadOff && ILI->first >= int64_t(DeadOff + DeadSize)) {
430  LLVM_DEBUG(dbgs() << "DSE: Full overwrite from partials: DeadLoc ["
431  << DeadOff << ", " << int64_t(DeadOff + DeadSize)
432  << ") Composite KillingLoc [" << ILI->second << ", "
433  << ILI->first << ")\n");
434  ++NumCompletePartials;
435  return OW_Complete;
436  }
437  }
438 
439  // Check for a dead store which writes to all the memory locations that
440  // the killing store writes to.
441  if (EnablePartialStoreMerging && KillingOff >= DeadOff &&
442  int64_t(DeadOff + DeadSize) > KillingOff &&
443  uint64_t(KillingOff - DeadOff) + KillingSize <= DeadSize) {
444  LLVM_DEBUG(dbgs() << "DSE: Partial overwrite a dead load [" << DeadOff
445  << ", " << int64_t(DeadOff + DeadSize)
446  << ") by a killing store [" << KillingOff << ", "
447  << int64_t(KillingOff + KillingSize) << ")\n");
448  // TODO: Maybe come up with a better name?
449  return OW_PartialEarlierWithFullLater;
450  }
451 
452  // Another interesting case is if the killing store overwrites the end of the
453  // dead store.
454  //
455  // |--dead--|
456  // |-- killing --|
457  //
458  // In this case we may want to trim the size of dead store to avoid
459  // generating stores to addresses which will definitely be overwritten killing
460  // store.
462  (KillingOff > DeadOff && KillingOff < int64_t(DeadOff + DeadSize) &&
463  int64_t(KillingOff + KillingSize) >= int64_t(DeadOff + DeadSize)))
464  return OW_End;
465 
466  // Finally, we also need to check if the killing store overwrites the
467  // beginning of the dead store.
468  //
469  // |--dead--|
470  // |-- killing --|
471  //
472  // In this case we may want to move the destination address and trim the size
473  // of dead store to avoid generating stores to addresses which will definitely
474  // be overwritten killing store.
476  (KillingOff <= DeadOff && int64_t(KillingOff + KillingSize) > DeadOff)) {
477  assert(int64_t(KillingOff + KillingSize) < int64_t(DeadOff + DeadSize) &&
478  "Expect to be handled as OW_Complete");
479  return OW_Begin;
480  }
481  // Otherwise, they don't completely overlap.
482  return OW_Unknown;
483 }
484 
485 /// Returns true if the memory which is accessed by the second instruction is not
486 /// modified between the first and the second instruction.
487 /// Precondition: Second instruction must be dominated by the first
488 /// instruction.
489 static bool
491  BatchAAResults &AA, const DataLayout &DL,
492  DominatorTree *DT) {
493  // Do a backwards scan through the CFG from SecondI to FirstI. Look for
494  // instructions which can modify the memory location accessed by SecondI.
495  //
496  // While doing the walk keep track of the address to check. It might be
497  // different in different basic blocks due to PHI translation.
498  using BlockAddressPair = std::pair<BasicBlock *, PHITransAddr>;
500  // Keep track of the address we visited each block with. Bail out if we
501  // visit a block with different addresses.
503 
504  BasicBlock::iterator FirstBBI(FirstI);
505  ++FirstBBI;
506  BasicBlock::iterator SecondBBI(SecondI);
507  BasicBlock *FirstBB = FirstI->getParent();
508  BasicBlock *SecondBB = SecondI->getParent();
509  MemoryLocation MemLoc;
510  if (auto *MemSet = dyn_cast<MemSetInst>(SecondI))
511  MemLoc = MemoryLocation::getForDest(MemSet);
512  else
513  MemLoc = MemoryLocation::get(SecondI);
514 
515  auto *MemLocPtr = const_cast<Value *>(MemLoc.Ptr);
516 
517  // Start checking the SecondBB.
518  WorkList.push_back(
519  std::make_pair(SecondBB, PHITransAddr(MemLocPtr, DL, nullptr)));
520  bool isFirstBlock = true;
521 
522  // Check all blocks going backward until we reach the FirstBB.
523  while (!WorkList.empty()) {
524  BlockAddressPair Current = WorkList.pop_back_val();
525  BasicBlock *B = Current.first;
526  PHITransAddr &Addr = Current.second;
527  Value *Ptr = Addr.getAddr();
528 
529  // Ignore instructions before FirstI if this is the FirstBB.
530  BasicBlock::iterator BI = (B == FirstBB ? FirstBBI : B->begin());
531 
533  if (isFirstBlock) {
534  // Ignore instructions after SecondI if this is the first visit of SecondBB.
535  assert(B == SecondBB && "first block is not the store block");
536  EI = SecondBBI;
537  isFirstBlock = false;
538  } else {
539  // It's not SecondBB or (in case of a loop) the second visit of SecondBB.
540  // In this case we also have to look at instructions after SecondI.
541  EI = B->end();
542  }
543  for (; BI != EI; ++BI) {
544  Instruction *I = &*BI;
545  if (I->mayWriteToMemory() && I != SecondI)
546  if (isModSet(AA.getModRefInfo(I, MemLoc.getWithNewPtr(Ptr))))
547  return false;
548  }
549  if (B != FirstBB) {
550  assert(B != &FirstBB->getParent()->getEntryBlock() &&
551  "Should not hit the entry block because SI must be dominated by LI");
552  for (BasicBlock *Pred : predecessors(B)) {
553  PHITransAddr PredAddr = Addr;
554  if (PredAddr.NeedsPHITranslationFromBlock(B)) {
555  if (!PredAddr.IsPotentiallyPHITranslatable())
556  return false;
557  if (PredAddr.PHITranslateValue(B, Pred, DT, false))
558  return false;
559  }
560  Value *TranslatedPtr = PredAddr.getAddr();
561  auto Inserted = Visited.insert(std::make_pair(Pred, TranslatedPtr));
562  if (!Inserted.second) {
563  // We already visited this block before. If it was with a different
564  // address - bail out!
565  if (TranslatedPtr != Inserted.first->second)
566  return false;
567  // ... otherwise just skip it.
568  continue;
569  }
570  WorkList.push_back(std::make_pair(Pred, PredAddr));
571  }
572  }
573  }
574  return true;
575 }
576 
577 static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart,
578  uint64_t &DeadSize, int64_t KillingStart,
579  uint64_t KillingSize, bool IsOverwriteEnd) {
580  auto *DeadIntrinsic = cast<AnyMemIntrinsic>(DeadI);
581  Align PrefAlign = DeadIntrinsic->getDestAlign().valueOrOne();
582 
583  // We assume that memet/memcpy operates in chunks of the "largest" native
584  // type size and aligned on the same value. That means optimal start and size
585  // of memset/memcpy should be modulo of preferred alignment of that type. That
586  // is it there is no any sense in trying to reduce store size any further
587  // since any "extra" stores comes for free anyway.
588  // On the other hand, maximum alignment we can achieve is limited by alignment
589  // of initial store.
590 
591  // TODO: Limit maximum alignment by preferred (or abi?) alignment of the
592  // "largest" native type.
593  // Note: What is the proper way to get that value?
594  // Should TargetTransformInfo::getRegisterBitWidth be used or anything else?
595  // PrefAlign = std::min(DL.getPrefTypeAlign(LargestType), PrefAlign);
596 
597  int64_t ToRemoveStart = 0;
598  uint64_t ToRemoveSize = 0;
599  // Compute start and size of the region to remove. Make sure 'PrefAlign' is
600  // maintained on the remaining store.
601  if (IsOverwriteEnd) {
602  // Calculate required adjustment for 'KillingStart' in order to keep
603  // remaining store size aligned on 'PerfAlign'.
604  uint64_t Off =
605  offsetToAlignment(uint64_t(KillingStart - DeadStart), PrefAlign);
606  ToRemoveStart = KillingStart + Off;
607  if (DeadSize <= uint64_t(ToRemoveStart - DeadStart))
608  return false;
609  ToRemoveSize = DeadSize - uint64_t(ToRemoveStart - DeadStart);
610  } else {
611  ToRemoveStart = DeadStart;
612  assert(KillingSize >= uint64_t(DeadStart - KillingStart) &&
613  "Not overlapping accesses?");
614  ToRemoveSize = KillingSize - uint64_t(DeadStart - KillingStart);
615  // Calculate required adjustment for 'ToRemoveSize'in order to keep
616  // start of the remaining store aligned on 'PerfAlign'.
617  uint64_t Off = offsetToAlignment(ToRemoveSize, PrefAlign);
618  if (Off != 0) {
619  if (ToRemoveSize <= (PrefAlign.value() - Off))
620  return false;
621  ToRemoveSize -= PrefAlign.value() - Off;
622  }
623  assert(isAligned(PrefAlign, ToRemoveSize) &&
624  "Should preserve selected alignment");
625  }
626 
627  assert(ToRemoveSize > 0 && "Shouldn't reach here if nothing to remove");
628  assert(DeadSize > ToRemoveSize && "Can't remove more than original size");
629 
630  uint64_t NewSize = DeadSize - ToRemoveSize;
631  if (auto *AMI = dyn_cast<AtomicMemIntrinsic>(DeadI)) {
632  // When shortening an atomic memory intrinsic, the newly shortened
633  // length must remain an integer multiple of the element size.
634  const uint32_t ElementSize = AMI->getElementSizeInBytes();
635  if (0 != NewSize % ElementSize)
636  return false;
637  }
638 
639  LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW "
640  << (IsOverwriteEnd ? "END" : "BEGIN") << ": " << *DeadI
641  << "\n KILLER [" << ToRemoveStart << ", "
642  << int64_t(ToRemoveStart + ToRemoveSize) << ")\n");
643 
644  Value *DeadWriteLength = DeadIntrinsic->getLength();
645  Value *TrimmedLength = ConstantInt::get(DeadWriteLength->getType(), NewSize);
646  DeadIntrinsic->setLength(TrimmedLength);
647  DeadIntrinsic->setDestAlignment(PrefAlign);
648 
649  if (!IsOverwriteEnd) {
650  Value *OrigDest = DeadIntrinsic->getRawDest();
651  Type *Int8PtrTy =
652  Type::getInt8PtrTy(DeadIntrinsic->getContext(),
653  OrigDest->getType()->getPointerAddressSpace());
654  Value *Dest = OrigDest;
655  if (OrigDest->getType() != Int8PtrTy)
656  Dest = CastInst::CreatePointerCast(OrigDest, Int8PtrTy, "", DeadI);
657  Value *Indices[1] = {
658  ConstantInt::get(DeadWriteLength->getType(), ToRemoveSize)};
660  Type::getInt8Ty(DeadIntrinsic->getContext()), Dest, Indices, "", DeadI);
661  NewDestGEP->setDebugLoc(DeadIntrinsic->getDebugLoc());
662  if (NewDestGEP->getType() != OrigDest->getType())
663  NewDestGEP = CastInst::CreatePointerCast(NewDestGEP, OrigDest->getType(),
664  "", DeadI);
665  DeadIntrinsic->setDest(NewDestGEP);
666  }
667 
668  // Finally update start and size of dead access.
669  if (!IsOverwriteEnd)
670  DeadStart += ToRemoveSize;
671  DeadSize = NewSize;
672 
673  return true;
674 }
675 
677  int64_t &DeadStart, uint64_t &DeadSize) {
678  if (IntervalMap.empty() || !isShortenableAtTheEnd(DeadI))
679  return false;
680 
681  OverlapIntervalsTy::iterator OII = --IntervalMap.end();
682  int64_t KillingStart = OII->second;
683  uint64_t KillingSize = OII->first - KillingStart;
684 
685  assert(OII->first - KillingStart >= 0 && "Size expected to be positive");
686 
687  if (KillingStart > DeadStart &&
688  // Note: "KillingStart - KillingStart" is known to be positive due to
689  // preceding check.
690  (uint64_t)(KillingStart - DeadStart) < DeadSize &&
691  // Note: "DeadSize - (uint64_t)(KillingStart - DeadStart)" is known to
692  // be non negative due to preceding checks.
693  KillingSize >= DeadSize - (uint64_t)(KillingStart - DeadStart)) {
694  if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
695  true)) {
696  IntervalMap.erase(OII);
697  return true;
698  }
699  }
700  return false;
701 }
702 
703 static bool tryToShortenBegin(Instruction *DeadI,
705  int64_t &DeadStart, uint64_t &DeadSize) {
707  return false;
708 
709  OverlapIntervalsTy::iterator OII = IntervalMap.begin();
710  int64_t KillingStart = OII->second;
711  uint64_t KillingSize = OII->first - KillingStart;
712 
713  assert(OII->first - KillingStart >= 0 && "Size expected to be positive");
714 
715  if (KillingStart <= DeadStart &&
716  // Note: "DeadStart - KillingStart" is known to be non negative due to
717  // preceding check.
718  KillingSize > (uint64_t)(DeadStart - KillingStart)) {
719  // Note: "KillingSize - (uint64_t)(DeadStart - DeadStart)" is known to
720  // be positive due to preceding checks.
721  assert(KillingSize - (uint64_t)(DeadStart - KillingStart) < DeadSize &&
722  "Should have been handled as OW_Complete");
723  if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
724  false)) {
725  IntervalMap.erase(OII);
726  return true;
727  }
728  }
729  return false;
730 }
731 
734  const TargetLibraryInfo &TLI) {
735  bool Changed = false;
736  for (auto OI : IOL) {
737  Instruction *DeadI = OI.first;
738  MemoryLocation Loc = getLocForWrite(DeadI, TLI);
739  assert(isRemovable(DeadI) && "Expect only removable instruction");
740 
741  const Value *Ptr = Loc.Ptr->stripPointerCasts();
742  int64_t DeadStart = 0;
743  uint64_t DeadSize = Loc.Size.getValue();
744  GetPointerBaseWithConstantOffset(Ptr, DeadStart, DL);
745  OverlapIntervalsTy &IntervalMap = OI.second;
746  Changed |= tryToShortenEnd(DeadI, IntervalMap, DeadStart, DeadSize);
747  if (IntervalMap.empty())
748  continue;
749  Changed |= tryToShortenBegin(DeadI, IntervalMap, DeadStart, DeadSize);
750  }
751  return Changed;
752 }
753 
754 static Constant *
756  int64_t KillingOffset, int64_t DeadOffset,
757  const DataLayout &DL, BatchAAResults &AA,
758  DominatorTree *DT) {
759 
760  if (DeadI && isa<ConstantInt>(DeadI->getValueOperand()) &&
761  DL.typeSizeEqualsStoreSize(DeadI->getValueOperand()->getType()) &&
762  KillingI && isa<ConstantInt>(KillingI->getValueOperand()) &&
763  DL.typeSizeEqualsStoreSize(KillingI->getValueOperand()->getType()) &&
764  memoryIsNotModifiedBetween(DeadI, KillingI, AA, DL, DT)) {
765  // If the store we find is:
766  // a) partially overwritten by the store to 'Loc'
767  // b) the killing store is fully contained in the dead one and
768  // c) they both have a constant value
769  // d) none of the two stores need padding
770  // Merge the two stores, replacing the dead store's value with a
771  // merge of both values.
772  // TODO: Deal with other constant types (vectors, etc), and probably
773  // some mem intrinsics (if needed)
774 
775  APInt DeadValue = cast<ConstantInt>(DeadI->getValueOperand())->getValue();
776  APInt KillingValue =
777  cast<ConstantInt>(KillingI->getValueOperand())->getValue();
778  unsigned KillingBits = KillingValue.getBitWidth();
779  assert(DeadValue.getBitWidth() > KillingValue.getBitWidth());
780  KillingValue = KillingValue.zext(DeadValue.getBitWidth());
781 
782  // Offset of the smaller store inside the larger store
783  unsigned BitOffsetDiff = (KillingOffset - DeadOffset) * 8;
784  unsigned LShiftAmount =
785  DL.isBigEndian() ? DeadValue.getBitWidth() - BitOffsetDiff - KillingBits
786  : BitOffsetDiff;
787  APInt Mask = APInt::getBitsSet(DeadValue.getBitWidth(), LShiftAmount,
788  LShiftAmount + KillingBits);
789  // Clear the bits we'll be replacing, then OR with the smaller
790  // store, shifted appropriately.
791  APInt Merged = (DeadValue & ~Mask) | (KillingValue << LShiftAmount);
792  LLVM_DEBUG(dbgs() << "DSE: Merge Stores:\n Dead: " << *DeadI
793  << "\n Killing: " << *KillingI
794  << "\n Merged Value: " << Merged << '\n');
795  return ConstantInt::get(DeadI->getValueOperand()->getType(), Merged);
796  }
797  return nullptr;
798 }
799 
800 namespace {
801 // Returns true if \p I is an intrisnic that does not read or write memory.
802 bool isNoopIntrinsic(Instruction *I) {
803  if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
804  switch (II->getIntrinsicID()) {
805  case Intrinsic::lifetime_start:
806  case Intrinsic::lifetime_end:
807  case Intrinsic::invariant_end:
808  case Intrinsic::launder_invariant_group:
809  case Intrinsic::assume:
810  return true;
811  case Intrinsic::dbg_addr:
812  case Intrinsic::dbg_declare:
813  case Intrinsic::dbg_label:
814  case Intrinsic::dbg_value:
815  llvm_unreachable("Intrinsic should not be modeled in MemorySSA");
816  default:
817  return false;
818  }
819  }
820  return false;
821 }
822 
823 // Check if we can ignore \p D for DSE.
824 bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller,
825  const TargetLibraryInfo &TLI) {
826  Instruction *DI = D->getMemoryInst();
827  // Calls that only access inaccessible memory cannot read or write any memory
828  // locations we consider for elimination.
829  if (auto *CB = dyn_cast<CallBase>(DI))
830  if (CB->onlyAccessesInaccessibleMemory()) {
831  if (isAllocLikeFn(DI, &TLI))
832  return false;
833  return true;
834  }
835  // We can eliminate stores to locations not visible to the caller across
836  // throwing instructions.
837  if (DI->mayThrow() && !DefVisibleToCaller)
838  return true;
839 
840  // We can remove the dead stores, irrespective of the fence and its ordering
841  // (release/acquire/seq_cst). Fences only constraints the ordering of
842  // already visible stores, it does not make a store visible to other
843  // threads. So, skipping over a fence does not change a store from being
844  // dead.
845  if (isa<FenceInst>(DI))
846  return true;
847 
848  // Skip intrinsics that do not really read or modify memory.
849  if (isNoopIntrinsic(DI))
850  return true;
851 
852  return false;
853 }
854 
855 struct DSEState {
856  Function &F;
857  AliasAnalysis &AA;
858 
859  /// The single BatchAA instance that is used to cache AA queries. It will
860  /// not be invalidated over the whole run. This is safe, because:
861  /// 1. Only memory writes are removed, so the alias cache for memory
862  /// locations remains valid.
863  /// 2. No new instructions are added (only instructions removed), so cached
864  /// information for a deleted value cannot be accessed by a re-used new
865  /// value pointer.
866  BatchAAResults BatchAA;
867 
868  MemorySSA &MSSA;
869  DominatorTree &DT;
870  PostDominatorTree &PDT;
871  const TargetLibraryInfo &TLI;
872  const DataLayout &DL;
873  const LoopInfo &LI;
874 
875  // Whether the function contains any irreducible control flow, useful for
876  // being accurately able to detect loops.
877  bool ContainsIrreducibleLoops;
878 
879  // All MemoryDefs that potentially could kill other MemDefs.
881  // Any that should be skipped as they are already deleted
883  // Keep track of all of the objects that are invisible to the caller before
884  // the function returns.
885  // SmallPtrSet<const Value *, 16> InvisibleToCallerBeforeRet;
886  DenseMap<const Value *, bool> InvisibleToCallerBeforeRet;
887  // Keep track of all of the objects that are invisible to the caller after
888  // the function returns.
889  DenseMap<const Value *, bool> InvisibleToCallerAfterRet;
890  // Keep track of blocks with throwing instructions not modeled in MemorySSA.
891  SmallPtrSet<BasicBlock *, 16> ThrowingBlocks;
892  // Post-order numbers for each basic block. Used to figure out if memory
893  // accesses are executed before another access.
894  DenseMap<BasicBlock *, unsigned> PostOrderNumbers;
895 
896  /// Keep track of instructions (partly) overlapping with killing MemoryDefs per
897  /// basic block.
899 
900  DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT,
901  PostDominatorTree &PDT, const TargetLibraryInfo &TLI,
902  const LoopInfo &LI)
903  : F(F), AA(AA), BatchAA(AA), MSSA(MSSA), DT(DT), PDT(PDT), TLI(TLI),
904  DL(F.getParent()->getDataLayout()), LI(LI) {}
905 
906  static DSEState get(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
908  const TargetLibraryInfo &TLI, const LoopInfo &LI) {
909  DSEState State(F, AA, MSSA, DT, PDT, TLI, LI);
910  // Collect blocks with throwing instructions not modeled in MemorySSA and
911  // alloc-like objects.
912  unsigned PO = 0;
913  for (BasicBlock *BB : post_order(&F)) {
914  State.PostOrderNumbers[BB] = PO++;
915  for (Instruction &I : *BB) {
916  MemoryAccess *MA = MSSA.getMemoryAccess(&I);
917  if (I.mayThrow() && !MA)
918  State.ThrowingBlocks.insert(I.getParent());
919 
920  auto *MD = dyn_cast_or_null<MemoryDef>(MA);
921  if (MD && State.MemDefs.size() < MemorySSADefsPerBlockLimit &&
922  (State.getLocForWriteEx(&I) || State.isMemTerminatorInst(&I)))
923  State.MemDefs.push_back(MD);
924  }
925  }
926 
927  // Treat byval or inalloca arguments the same as Allocas, stores to them are
928  // dead at the end of the function.
929  for (Argument &AI : F.args())
930  if (AI.hasPassPointeeByValueCopyAttr()) {
931  // For byval, the caller doesn't know the address of the allocation.
932  if (AI.hasByValAttr())
933  State.InvisibleToCallerBeforeRet.insert({&AI, true});
934  State.InvisibleToCallerAfterRet.insert({&AI, true});
935  }
936 
937  // Collect whether there is any irreducible control flow in the function.
938  State.ContainsIrreducibleLoops = mayContainIrreducibleControl(F, &LI);
939 
940  return State;
941  }
942 
943  /// Return 'OW_Complete' if a store to the 'KillingLoc' location (by \p
944  /// KillingI instruction) completely overwrites a store to the 'DeadLoc'
945  /// location (by \p DeadI instruction).
946  /// Return OW_MaybePartial if \p KillingI does not completely overwrite
947  /// \p DeadI, but they both write to the same underlying object. In that
948  /// case, use isPartialOverwrite to check if \p KillingI partially overwrites
949  /// \p DeadI. Returns 'OW_Unknown' if nothing can be determined.
950  OverwriteResult isOverwrite(const Instruction *KillingI,
951  const Instruction *DeadI,
952  const MemoryLocation &KillingLoc,
953  const MemoryLocation &DeadLoc,
954  int64_t &KillingOff, int64_t &DeadOff) {
955  // AliasAnalysis does not always account for loops. Limit overwrite checks
956  // to dependencies for which we can guarantee they are independent of any
957  // loops they are in.
958  if (!isGuaranteedLoopIndependent(DeadI, KillingI, DeadLoc))
959  return OW_Unknown;
960 
961  // FIXME: Vet that this works for size upper-bounds. Seems unlikely that we'll
962  // get imprecise values here, though (except for unknown sizes).
963  if (!KillingLoc.Size.isPrecise() || !DeadLoc.Size.isPrecise()) {
964  // In case no constant size is known, try to an IR values for the number
965  // of bytes written and check if they match.
966  const auto *KillingMemI = dyn_cast<MemIntrinsic>(KillingI);
967  const auto *DeadMemI = dyn_cast<MemIntrinsic>(DeadI);
968  if (KillingMemI && DeadMemI) {
969  const Value *KillingV = KillingMemI->getLength();
970  const Value *DeadV = DeadMemI->getLength();
971  if (KillingV == DeadV && BatchAA.isMustAlias(DeadLoc, KillingLoc))
972  return OW_Complete;
973  }
974 
975  // Masked stores have imprecise locations, but we can reason about them
976  // to some extent.
977  return isMaskedStoreOverwrite(KillingI, DeadI, BatchAA);
978  }
979 
980  const uint64_t KillingSize = KillingLoc.Size.getValue();
981  const uint64_t DeadSize = DeadLoc.Size.getValue();
982 
983  // Query the alias information
984  AliasResult AAR = BatchAA.alias(KillingLoc, DeadLoc);
985 
986  // If the start pointers are the same, we just have to compare sizes to see if
987  // the killing store was larger than the dead store.
988  if (AAR == AliasResult::MustAlias) {
989  // Make sure that the KillingSize size is >= the DeadSize size.
990  if (KillingSize >= DeadSize)
991  return OW_Complete;
992  }
993 
994  // If we hit a partial alias we may have a full overwrite
995  if (AAR == AliasResult::PartialAlias && AAR.hasOffset()) {
996  int32_t Off = AAR.getOffset();
997  if (Off >= 0 && (uint64_t)Off + DeadSize <= KillingSize)
998  return OW_Complete;
999  }
1000 
1001  // Check to see if the killing store is to the entire object (either a
1002  // global, an alloca, or a byval/inalloca argument). If so, then it clearly
1003  // overwrites any other store to the same object.
1004  const Value *DeadPtr = DeadLoc.Ptr->stripPointerCasts();
1005  const Value *KillingPtr = KillingLoc.Ptr->stripPointerCasts();
1006  const Value *DeadUndObj = getUnderlyingObject(DeadPtr);
1007  const Value *KillingUndObj = getUnderlyingObject(KillingPtr);
1008 
1009  // If we can't resolve the same pointers to the same object, then we can't
1010  // analyze them at all.
1011  if (DeadUndObj != KillingUndObj)
1012  return OW_Unknown;
1013 
1014  // If the KillingI store is to a recognizable object, get its size.
1015  uint64_t KillingUndObjSize = getPointerSize(KillingUndObj, DL, TLI, &F);
1016  if (KillingUndObjSize != MemoryLocation::UnknownSize)
1017  if (KillingUndObjSize == KillingSize && KillingUndObjSize >= DeadSize)
1018  return OW_Complete;
1019 
1020  // Okay, we have stores to two completely different pointers. Try to
1021  // decompose the pointer into a "base + constant_offset" form. If the base
1022  // pointers are equal, then we can reason about the two stores.
1023  DeadOff = 0;
1024  KillingOff = 0;
1025  const Value *DeadBasePtr =
1026  GetPointerBaseWithConstantOffset(DeadPtr, DeadOff, DL);
1027  const Value *KillingBasePtr =
1028  GetPointerBaseWithConstantOffset(KillingPtr, KillingOff, DL);
1029 
1030  // If the base pointers still differ, we have two completely different
1031  // stores.
1032  if (DeadBasePtr != KillingBasePtr)
1033  return OW_Unknown;
1034 
1035  // The killing access completely overlaps the dead store if and only if
1036  // both start and end of the dead one is "inside" the killing one:
1037  // |<->|--dead--|<->|
1038  // |-----killing------|
1039  // Accesses may overlap if and only if start of one of them is "inside"
1040  // another one:
1041  // |<->|--dead--|<-------->|
1042  // |-------killing--------|
1043  // OR
1044  // |-------dead-------|
1045  // |<->|---killing---|<----->|
1046  //
1047  // We have to be careful here as *Off is signed while *.Size is unsigned.
1048 
1049  // Check if the dead access starts "not before" the killing one.
1050  if (DeadOff >= KillingOff) {
1051  // If the dead access ends "not after" the killing access then the
1052  // dead one is completely overwritten by the killing one.
1053  if (uint64_t(DeadOff - KillingOff) + DeadSize <= KillingSize)
1054  return OW_Complete;
1055  // If start of the dead access is "before" end of the killing access
1056  // then accesses overlap.
1057  else if ((uint64_t)(DeadOff - KillingOff) < KillingSize)
1058  return OW_MaybePartial;
1059  }
1060  // If start of the killing access is "before" end of the dead access then
1061  // accesses overlap.
1062  else if ((uint64_t)(KillingOff - DeadOff) < DeadSize) {
1063  return OW_MaybePartial;
1064  }
1065 
1066  // Can reach here only if accesses are known not to overlap. There is no
1067  // dedicated code to indicate no overlap so signal "unknown".
1068  return OW_Unknown;
1069  }
1070 
1071  bool isInvisibleToCallerAfterRet(const Value *V) {
1072  if (isa<AllocaInst>(V))
1073  return true;
1074  auto I = InvisibleToCallerAfterRet.insert({V, false});
1075  if (I.second) {
1076  if (!isInvisibleToCallerBeforeRet(V)) {
1077  I.first->second = false;
1078  } else {
1079  auto *Inst = dyn_cast<Instruction>(V);
1080  if (Inst && isAllocLikeFn(Inst, &TLI))
1081  I.first->second = !PointerMayBeCaptured(V, true, false);
1082  }
1083  }
1084  return I.first->second;
1085  }
1086 
1087  bool isInvisibleToCallerBeforeRet(const Value *V) {
1088  if (isa<AllocaInst>(V))
1089  return true;
1090  auto I = InvisibleToCallerBeforeRet.insert({V, false});
1091  if (I.second) {
1092  auto *Inst = dyn_cast<Instruction>(V);
1093  if (Inst && isAllocLikeFn(Inst, &TLI))
1094  // NOTE: This could be made more precise by PointerMayBeCapturedBefore
1095  // with the killing MemoryDef. But we refrain from doing so for now to
1096  // limit compile-time and this does not cause any changes to the number
1097  // of stores removed on a large test set in practice.
1098  I.first->second = !PointerMayBeCaptured(V, false, true);
1099  }
1100  return I.first->second;
1101  }
1102 
1103  Optional<MemoryLocation> getLocForWriteEx(Instruction *I) const {
1104  if (!I->mayWriteToMemory())
1105  return None;
1106 
1107  if (auto *MTI = dyn_cast<AnyMemIntrinsic>(I))
1108  return {MemoryLocation::getForDest(MTI)};
1109 
1110  if (auto *CB = dyn_cast<CallBase>(I)) {
1111  // If the functions may write to memory we do not know about, bail out.
1112  if (!CB->onlyAccessesArgMemory() &&
1113  !CB->onlyAccessesInaccessibleMemOrArgMem())
1114  return None;
1115 
1116  LibFunc LF;
1117  if (TLI.getLibFunc(*CB, LF) && TLI.has(LF)) {
1118  switch (LF) {
1119  case LibFunc_strcpy:
1120  case LibFunc_strncpy:
1121  case LibFunc_strcat:
1122  case LibFunc_strncat:
1123  return {MemoryLocation::getAfter(CB->getArgOperand(0))};
1124  default:
1125  break;
1126  }
1127  }
1128  switch (CB->getIntrinsicID()) {
1129  case Intrinsic::init_trampoline:
1130  return {MemoryLocation::getAfter(CB->getArgOperand(0))};
1131  case Intrinsic::masked_store:
1132  return {MemoryLocation::getForArgument(CB, 1, TLI)};
1133  default:
1134  break;
1135  }
1136  return None;
1137  }
1138 
1139  return MemoryLocation::getOrNone(I);
1140  }
1141 
1142  /// Returns true if \p UseInst completely overwrites \p DefLoc
1143  /// (stored by \p DefInst).
1144  bool isCompleteOverwrite(const MemoryLocation &DefLoc, Instruction *DefInst,
1145  Instruction *UseInst) {
1146  // UseInst has a MemoryDef associated in MemorySSA. It's possible for a
1147  // MemoryDef to not write to memory, e.g. a volatile load is modeled as a
1148  // MemoryDef.
1149  if (!UseInst->mayWriteToMemory())
1150  return false;
1151 
1152  if (auto *CB = dyn_cast<CallBase>(UseInst))
1153  if (CB->onlyAccessesInaccessibleMemory())
1154  return false;
1155 
1156  int64_t InstWriteOffset, DepWriteOffset;
1157  if (auto CC = getLocForWriteEx(UseInst))
1158  return isOverwrite(UseInst, DefInst, *CC, DefLoc, InstWriteOffset,
1159  DepWriteOffset) == OW_Complete;
1160  return false;
1161  }
1162 
1163  /// Returns true if \p Def is not read before returning from the function.
1164  bool isWriteAtEndOfFunction(MemoryDef *Def) {
1165  LLVM_DEBUG(dbgs() << " Check if def " << *Def << " ("
1166  << *Def->getMemoryInst()
1167  << ") is at the end the function \n");
1168 
1169  auto MaybeLoc = getLocForWriteEx(Def->getMemoryInst());
1170  if (!MaybeLoc) {
1171  LLVM_DEBUG(dbgs() << " ... could not get location for write.\n");
1172  return false;
1173  }
1174 
1177  auto PushMemUses = [&WorkList, &Visited](MemoryAccess *Acc) {
1178  if (!Visited.insert(Acc).second)
1179  return;
1180  for (Use &U : Acc->uses())
1181  WorkList.push_back(cast<MemoryAccess>(U.getUser()));
1182  };
1183  PushMemUses(Def);
1184  for (unsigned I = 0; I < WorkList.size(); I++) {
1185  if (WorkList.size() >= MemorySSAScanLimit) {
1186  LLVM_DEBUG(dbgs() << " ... hit exploration limit.\n");
1187  return false;
1188  }
1189 
1190  MemoryAccess *UseAccess = WorkList[I];
1191  // Simply adding the users of MemoryPhi to the worklist is not enough,
1192  // because we might miss read clobbers in different iterations of a loop,
1193  // for example.
1194  // TODO: Add support for phi translation to handle the loop case.
1195  if (isa<MemoryPhi>(UseAccess))
1196  return false;
1197 
1198  // TODO: Checking for aliasing is expensive. Consider reducing the amount
1199  // of times this is called and/or caching it.
1200  Instruction *UseInst = cast<MemoryUseOrDef>(UseAccess)->getMemoryInst();
1201  if (isReadClobber(*MaybeLoc, UseInst)) {
1202  LLVM_DEBUG(dbgs() << " ... hit read clobber " << *UseInst << ".\n");
1203  return false;
1204  }
1205 
1206  if (MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess))
1207  PushMemUses(UseDef);
1208  }
1209  return true;
1210  }
1211 
1212  /// If \p I is a memory terminator like llvm.lifetime.end or free, return a
1213  /// pair with the MemoryLocation terminated by \p I and a boolean flag
1214  /// indicating whether \p I is a free-like call.
1216  getLocForTerminator(Instruction *I) const {
1217  uint64_t Len;
1218  Value *Ptr;
1219  if (match(I, m_Intrinsic<Intrinsic::lifetime_end>(m_ConstantInt(Len),
1220  m_Value(Ptr))))
1221  return {std::make_pair(MemoryLocation(Ptr, Len), false)};
1222 
1223  if (auto *CB = dyn_cast<CallBase>(I)) {
1224  if (isFreeCall(I, &TLI))
1225  return {std::make_pair(MemoryLocation::getAfter(CB->getArgOperand(0)),
1226  true)};
1227  }
1228 
1229  return None;
1230  }
1231 
1232  /// Returns true if \p I is a memory terminator instruction like
1233  /// llvm.lifetime.end or free.
1234  bool isMemTerminatorInst(Instruction *I) const {
1235  IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
1236  return (II && II->getIntrinsicID() == Intrinsic::lifetime_end) ||
1237  isFreeCall(I, &TLI);
1238  }
1239 
1240  /// Returns true if \p MaybeTerm is a memory terminator for \p Loc from
1241  /// instruction \p AccessI.
1242  bool isMemTerminator(const MemoryLocation &Loc, Instruction *AccessI,
1243  Instruction *MaybeTerm) {
1245  getLocForTerminator(MaybeTerm);
1246 
1247  if (!MaybeTermLoc)
1248  return false;
1249 
1250  // If the terminator is a free-like call, all accesses to the underlying
1251  // object can be considered terminated.
1252  if (getUnderlyingObject(Loc.Ptr) !=
1253  getUnderlyingObject(MaybeTermLoc->first.Ptr))
1254  return false;
1255 
1256  auto TermLoc = MaybeTermLoc->first;
1257  if (MaybeTermLoc->second) {
1258  const Value *LocUO = getUnderlyingObject(Loc.Ptr);
1259  return BatchAA.isMustAlias(TermLoc.Ptr, LocUO);
1260  }
1261  int64_t InstWriteOffset = 0;
1262  int64_t DepWriteOffset = 0;
1263  return isOverwrite(MaybeTerm, AccessI, TermLoc, Loc, InstWriteOffset,
1264  DepWriteOffset) == OW_Complete;
1265  }
1266 
1267  // Returns true if \p Use may read from \p DefLoc.
1268  bool isReadClobber(const MemoryLocation &DefLoc, Instruction *UseInst) {
1269  if (isNoopIntrinsic(UseInst))
1270  return false;
1271 
1272  // Monotonic or weaker atomic stores can be re-ordered and do not need to be
1273  // treated as read clobber.
1274  if (auto SI = dyn_cast<StoreInst>(UseInst))
1275  return isStrongerThan(SI->getOrdering(), AtomicOrdering::Monotonic);
1276 
1277  if (!UseInst->mayReadFromMemory())
1278  return false;
1279 
1280  if (auto *CB = dyn_cast<CallBase>(UseInst))
1281  if (CB->onlyAccessesInaccessibleMemory())
1282  return false;
1283 
1284  // NOTE: For calls, the number of stores removed could be slightly improved
1285  // by using AA.callCapturesBefore(UseInst, DefLoc, &DT), but that showed to
1286  // be expensive compared to the benefits in practice. For now, avoid more
1287  // expensive analysis to limit compile-time.
1288  return isRefSet(BatchAA.getModRefInfo(UseInst, DefLoc));
1289  }
1290 
1291  /// Returns true if a dependency between \p Current and \p KillingDef is
1292  /// guaranteed to be loop invariant for the loops that they are in. Either
1293  /// because they are known to be in the same block, in the same loop level or
1294  /// by guaranteeing that \p CurrentLoc only references a single MemoryLocation
1295  /// during execution of the containing function.
1296  bool isGuaranteedLoopIndependent(const Instruction *Current,
1297  const Instruction *KillingDef,
1298  const MemoryLocation &CurrentLoc) {
1299  // If the dependency is within the same block or loop level (being careful
1300  // of irreducible loops), we know that AA will return a valid result for the
1301  // memory dependency. (Both at the function level, outside of any loop,
1302  // would also be valid but we currently disable that to limit compile time).
1303  if (Current->getParent() == KillingDef->getParent())
1304  return true;
1305  const Loop *CurrentLI = LI.getLoopFor(Current->getParent());
1306  if (!ContainsIrreducibleLoops && CurrentLI &&
1307  CurrentLI == LI.getLoopFor(KillingDef->getParent()))
1308  return true;
1309  // Otherwise check the memory location is invariant to any loops.
1310  return isGuaranteedLoopInvariant(CurrentLoc.Ptr);
1311  }
1312 
1313  /// Returns true if \p Ptr is guaranteed to be loop invariant for any possible
1314  /// loop. In particular, this guarantees that it only references a single
1315  /// MemoryLocation during execution of the containing function.
1316  bool isGuaranteedLoopInvariant(const Value *Ptr) {
1317  auto IsGuaranteedLoopInvariantBase = [this](const Value *Ptr) {
1318  Ptr = Ptr->stripPointerCasts();
1319  if (auto *I = dyn_cast<Instruction>(Ptr)) {
1320  if (isa<AllocaInst>(Ptr))
1321  return true;
1322 
1323  if (isAllocLikeFn(I, &TLI))
1324  return true;
1325 
1326  return false;
1327  }
1328  return true;
1329  };
1330 
1331  Ptr = Ptr->stripPointerCasts();
1332  if (auto *I = dyn_cast<Instruction>(Ptr)) {
1333  if (I->getParent()->isEntryBlock())
1334  return true;
1335  }
1336  if (auto *GEP = dyn_cast<GEPOperator>(Ptr)) {
1337  return IsGuaranteedLoopInvariantBase(GEP->getPointerOperand()) &&
1338  GEP->hasAllConstantIndices();
1339  }
1340  return IsGuaranteedLoopInvariantBase(Ptr);
1341  }
1342 
1343  // Find a MemoryDef writing to \p KillingLoc and dominating \p StartAccess,
1344  // with no read access between them or on any other path to a function exit
1345  // block if \p KillingLoc is not accessible after the function returns. If
1346  // there is no such MemoryDef, return None. The returned value may not
1347  // (completely) overwrite \p KillingLoc. Currently we bail out when we
1348  // encounter an aliasing MemoryUse (read).
1350  getDomMemoryDef(MemoryDef *KillingDef, MemoryAccess *StartAccess,
1351  const MemoryLocation &KillingLoc, const Value *KillingUndObj,
1352  unsigned &ScanLimit, unsigned &WalkerStepLimit,
1353  bool IsMemTerm, unsigned &PartialLimit) {
1354  if (ScanLimit == 0 || WalkerStepLimit == 0) {
1355  LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n");
1356  return None;
1357  }
1358 
1359  MemoryAccess *Current = StartAccess;
1360  Instruction *KillingI = KillingDef->getMemoryInst();
1361  LLVM_DEBUG(dbgs() << " trying to get dominating access\n");
1362 
1363  // Find the next clobbering Mod access for DefLoc, starting at StartAccess.
1364  Optional<MemoryLocation> CurrentLoc;
1365  for (;; Current = cast<MemoryDef>(Current)->getDefiningAccess()) {
1366  LLVM_DEBUG({
1367  dbgs() << " visiting " << *Current;
1368  if (!MSSA.isLiveOnEntryDef(Current) && isa<MemoryUseOrDef>(Current))
1369  dbgs() << " (" << *cast<MemoryUseOrDef>(Current)->getMemoryInst()
1370  << ")";
1371  dbgs() << "\n";
1372  });
1373 
1374  // Reached TOP.
1375  if (MSSA.isLiveOnEntryDef(Current)) {
1376  LLVM_DEBUG(dbgs() << " ... found LiveOnEntryDef\n");
1377  return None;
1378  }
1379 
1380  // Cost of a step. Accesses in the same block are more likely to be valid
1381  // candidates for elimination, hence consider them cheaper.
1382  unsigned StepCost = KillingDef->getBlock() == Current->getBlock()
1385  if (WalkerStepLimit <= StepCost) {
1386  LLVM_DEBUG(dbgs() << " ... hit walker step limit\n");
1387  return None;
1388  }
1389  WalkerStepLimit -= StepCost;
1390 
1391  // Return for MemoryPhis. They cannot be eliminated directly and the
1392  // caller is responsible for traversing them.
1393  if (isa<MemoryPhi>(Current)) {
1394  LLVM_DEBUG(dbgs() << " ... found MemoryPhi\n");
1395  return Current;
1396  }
1397 
1398  // Below, check if CurrentDef is a valid candidate to be eliminated by
1399  // KillingDef. If it is not, check the next candidate.
1400  MemoryDef *CurrentDef = cast<MemoryDef>(Current);
1401  Instruction *CurrentI = CurrentDef->getMemoryInst();
1402 
1403  if (canSkipDef(CurrentDef, !isInvisibleToCallerBeforeRet(KillingUndObj),
1404  TLI))
1405  continue;
1406 
1407  // Before we try to remove anything, check for any extra throwing
1408  // instructions that block us from DSEing
1409  if (mayThrowBetween(KillingI, CurrentI, KillingUndObj)) {
1410  LLVM_DEBUG(dbgs() << " ... skip, may throw!\n");
1411  return None;
1412  }
1413 
1414  // Check for anything that looks like it will be a barrier to further
1415  // removal
1416  if (isDSEBarrier(KillingUndObj, CurrentI)) {
1417  LLVM_DEBUG(dbgs() << " ... skip, barrier\n");
1418  return None;
1419  }
1420 
1421  // If Current is known to be on path that reads DefLoc or is a read
1422  // clobber, bail out, as the path is not profitable. We skip this check
1423  // for intrinsic calls, because the code knows how to handle memcpy
1424  // intrinsics.
1425  if (!isa<IntrinsicInst>(CurrentI) && isReadClobber(KillingLoc, CurrentI))
1426  return None;
1427 
1428  // Quick check if there are direct uses that are read-clobbers.
1429  if (any_of(Current->uses(), [this, &KillingLoc, StartAccess](Use &U) {
1430  if (auto *UseOrDef = dyn_cast<MemoryUseOrDef>(U.getUser()))
1431  return !MSSA.dominates(StartAccess, UseOrDef) &&
1432  isReadClobber(KillingLoc, UseOrDef->getMemoryInst());
1433  return false;
1434  })) {
1435  LLVM_DEBUG(dbgs() << " ... found a read clobber\n");
1436  return None;
1437  }
1438 
1439  // If Current cannot be analyzed or is not removable, check the next
1440  // candidate.
1441  if (!hasAnalyzableMemoryWrite(CurrentI, TLI) || !isRemovable(CurrentI))
1442  continue;
1443 
1444  // If Current does not have an analyzable write location, skip it
1445  CurrentLoc = getLocForWriteEx(CurrentI);
1446  if (!CurrentLoc)
1447  continue;
1448 
1449  // AliasAnalysis does not account for loops. Limit elimination to
1450  // candidates for which we can guarantee they always store to the same
1451  // memory location and not located in different loops.
1452  if (!isGuaranteedLoopIndependent(CurrentI, KillingI, *CurrentLoc)) {
1453  LLVM_DEBUG(dbgs() << " ... not guaranteed loop independent\n");
1454  WalkerStepLimit -= 1;
1455  continue;
1456  }
1457 
1458  if (IsMemTerm) {
1459  // If the killing def is a memory terminator (e.g. lifetime.end), check
1460  // the next candidate if the current Current does not write the same
1461  // underlying object as the terminator.
1462  if (!isMemTerminator(*CurrentLoc, CurrentI, KillingI))
1463  continue;
1464  } else {
1465  int64_t KillingOffset = 0;
1466  int64_t DeadOffset = 0;
1467  auto OR = isOverwrite(KillingI, CurrentI, KillingLoc, *CurrentLoc,
1468  KillingOffset, DeadOffset);
1469  // If Current does not write to the same object as KillingDef, check
1470  // the next candidate.
1471  if (OR == OW_Unknown)
1472  continue;
1473  else if (OR == OW_MaybePartial) {
1474  // If KillingDef only partially overwrites Current, check the next
1475  // candidate if the partial step limit is exceeded. This aggressively
1476  // limits the number of candidates for partial store elimination,
1477  // which are less likely to be removable in the end.
1478  if (PartialLimit <= 1) {
1479  WalkerStepLimit -= 1;
1480  continue;
1481  }
1482  PartialLimit -= 1;
1483  }
1484  }
1485  break;
1486  };
1487 
1488  // Accesses to objects accessible after the function returns can only be
1489  // eliminated if the access is dead along all paths to the exit. Collect
1490  // the blocks with killing (=completely overwriting MemoryDefs) and check if
1491  // they cover all paths from MaybeDeadAccess to any function exit.
1492  SmallPtrSet<Instruction *, 16> KillingDefs;
1493  KillingDefs.insert(KillingDef->getMemoryInst());
1494  MemoryAccess *MaybeDeadAccess = Current;
1495  MemoryLocation MaybeDeadLoc = *CurrentLoc;
1496  Instruction *MaybeDeadI = cast<MemoryDef>(MaybeDeadAccess)->getMemoryInst();
1497  LLVM_DEBUG(dbgs() << " Checking for reads of " << *MaybeDeadAccess << " ("
1498  << *MaybeDeadI << ")\n");
1499 
1501  auto PushMemUses = [&WorkList](MemoryAccess *Acc) {
1502  for (Use &U : Acc->uses())
1503  WorkList.insert(cast<MemoryAccess>(U.getUser()));
1504  };
1505  PushMemUses(MaybeDeadAccess);
1506 
1507  // Check if DeadDef may be read.
1508  for (unsigned I = 0; I < WorkList.size(); I++) {
1509  MemoryAccess *UseAccess = WorkList[I];
1510 
1511  LLVM_DEBUG(dbgs() << " " << *UseAccess);
1512  // Bail out if the number of accesses to check exceeds the scan limit.
1513  if (ScanLimit < (WorkList.size() - I)) {
1514  LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n");
1515  return None;
1516  }
1517  --ScanLimit;
1518  NumDomMemDefChecks++;
1519 
1520  if (isa<MemoryPhi>(UseAccess)) {
1521  if (any_of(KillingDefs, [this, UseAccess](Instruction *KI) {
1522  return DT.properlyDominates(KI->getParent(),
1523  UseAccess->getBlock());
1524  })) {
1525  LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing block\n");
1526  continue;
1527  }
1528  LLVM_DEBUG(dbgs() << "\n ... adding PHI uses\n");
1529  PushMemUses(UseAccess);
1530  continue;
1531  }
1532 
1533  Instruction *UseInst = cast<MemoryUseOrDef>(UseAccess)->getMemoryInst();
1534  LLVM_DEBUG(dbgs() << " (" << *UseInst << ")\n");
1535 
1536  if (any_of(KillingDefs, [this, UseInst](Instruction *KI) {
1537  return DT.dominates(KI, UseInst);
1538  })) {
1539  LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing def\n");
1540  continue;
1541  }
1542 
1543  // A memory terminator kills all preceeding MemoryDefs and all succeeding
1544  // MemoryAccesses. We do not have to check it's users.
1545  if (isMemTerminator(MaybeDeadLoc, MaybeDeadI, UseInst)) {
1546  LLVM_DEBUG(
1547  dbgs()
1548  << " ... skipping, memterminator invalidates following accesses\n");
1549  continue;
1550  }
1551 
1552  if (isNoopIntrinsic(cast<MemoryUseOrDef>(UseAccess)->getMemoryInst())) {
1553  LLVM_DEBUG(dbgs() << " ... adding uses of intrinsic\n");
1554  PushMemUses(UseAccess);
1555  continue;
1556  }
1557 
1558  if (UseInst->mayThrow() && !isInvisibleToCallerBeforeRet(KillingUndObj)) {
1559  LLVM_DEBUG(dbgs() << " ... found throwing instruction\n");
1560  return None;
1561  }
1562 
1563  // Uses which may read the original MemoryDef mean we cannot eliminate the
1564  // original MD. Stop walk.
1565  if (isReadClobber(MaybeDeadLoc, UseInst)) {
1566  LLVM_DEBUG(dbgs() << " ... found read clobber\n");
1567  return None;
1568  }
1569 
1570  // If this worklist walks back to the original memory access (and the
1571  // pointer is not guarenteed loop invariant) then we cannot assume that a
1572  // store kills itself.
1573  if (MaybeDeadAccess == UseAccess &&
1574  !isGuaranteedLoopInvariant(MaybeDeadLoc.Ptr)) {
1575  LLVM_DEBUG(dbgs() << " ... found not loop invariant self access\n");
1576  return None;
1577  }
1578  // Otherwise, for the KillingDef and MaybeDeadAccess we only have to check
1579  // if it reads the memory location.
1580  // TODO: It would probably be better to check for self-reads before
1581  // calling the function.
1582  if (KillingDef == UseAccess || MaybeDeadAccess == UseAccess) {
1583  LLVM_DEBUG(dbgs() << " ... skipping killing def/dom access\n");
1584  continue;
1585  }
1586 
1587  // Check all uses for MemoryDefs, except for defs completely overwriting
1588  // the original location. Otherwise we have to check uses of *all*
1589  // MemoryDefs we discover, including non-aliasing ones. Otherwise we might
1590  // miss cases like the following
1591  // 1 = Def(LoE) ; <----- DeadDef stores [0,1]
1592  // 2 = Def(1) ; (2, 1) = NoAlias, stores [2,3]
1593  // Use(2) ; MayAlias 2 *and* 1, loads [0, 3].
1594  // (The Use points to the *first* Def it may alias)
1595  // 3 = Def(1) ; <---- Current (3, 2) = NoAlias, (3,1) = MayAlias,
1596  // stores [0,1]
1597  if (MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess)) {
1598  if (isCompleteOverwrite(MaybeDeadLoc, MaybeDeadI, UseInst)) {
1599  BasicBlock *MaybeKillingBlock = UseInst->getParent();
1600  if (PostOrderNumbers.find(MaybeKillingBlock)->second <
1601  PostOrderNumbers.find(MaybeDeadAccess->getBlock())->second) {
1602  if (!isInvisibleToCallerAfterRet(KillingUndObj)) {
1603  LLVM_DEBUG(dbgs()
1604  << " ... found killing def " << *UseInst << "\n");
1605  KillingDefs.insert(UseInst);
1606  }
1607  } else {
1608  LLVM_DEBUG(dbgs()
1609  << " ... found preceeding def " << *UseInst << "\n");
1610  return None;
1611  }
1612  } else
1613  PushMemUses(UseDef);
1614  }
1615  }
1616 
1617  // For accesses to locations visible after the function returns, make sure
1618  // that the location is dead (=overwritten) along all paths from
1619  // MaybeDeadAccess to the exit.
1620  if (!isInvisibleToCallerAfterRet(KillingUndObj)) {
1621  SmallPtrSet<BasicBlock *, 16> KillingBlocks;
1622  for (Instruction *KD : KillingDefs)
1623  KillingBlocks.insert(KD->getParent());
1624  assert(!KillingBlocks.empty() &&
1625  "Expected at least a single killing block");
1626 
1627  // Find the common post-dominator of all killing blocks.
1628  BasicBlock *CommonPred = *KillingBlocks.begin();
1629  for (BasicBlock *BB : llvm::drop_begin(KillingBlocks)) {
1630  if (!CommonPred)
1631  break;
1632  CommonPred = PDT.findNearestCommonDominator(CommonPred, BB);
1633  }
1634 
1635  // If CommonPred is in the set of killing blocks, just check if it
1636  // post-dominates MaybeDeadAccess.
1637  if (KillingBlocks.count(CommonPred)) {
1638  if (PDT.dominates(CommonPred, MaybeDeadAccess->getBlock()))
1639  return {MaybeDeadAccess};
1640  return None;
1641  }
1642 
1643  // If the common post-dominator does not post-dominate MaybeDeadAccess,
1644  // there is a path from MaybeDeadAccess to an exit not going through a
1645  // killing block.
1646  if (PDT.dominates(CommonPred, MaybeDeadAccess->getBlock())) {
1647  SetVector<BasicBlock *> WorkList;
1648 
1649  // If CommonPred is null, there are multiple exits from the function.
1650  // They all have to be added to the worklist.
1651  if (CommonPred)
1652  WorkList.insert(CommonPred);
1653  else
1654  for (BasicBlock *R : PDT.roots())
1655  WorkList.insert(R);
1656 
1657  NumCFGTries++;
1658  // Check if all paths starting from an exit node go through one of the
1659  // killing blocks before reaching MaybeDeadAccess.
1660  for (unsigned I = 0; I < WorkList.size(); I++) {
1661  NumCFGChecks++;
1662  BasicBlock *Current = WorkList[I];
1663  if (KillingBlocks.count(Current))
1664  continue;
1665  if (Current == MaybeDeadAccess->getBlock())
1666  return None;
1667 
1668  // MaybeDeadAccess is reachable from the entry, so we don't have to
1669  // explore unreachable blocks further.
1670  if (!DT.isReachableFromEntry(Current))
1671  continue;
1672 
1673  for (BasicBlock *Pred : predecessors(Current))
1674  WorkList.insert(Pred);
1675 
1676  if (WorkList.size() >= MemorySSAPathCheckLimit)
1677  return None;
1678  }
1679  NumCFGSuccess++;
1680  return {MaybeDeadAccess};
1681  }
1682  return None;
1683  }
1684 
1685  // No aliasing MemoryUses of MaybeDeadAccess found, MaybeDeadAccess is
1686  // potentially dead.
1687  return {MaybeDeadAccess};
1688  }
1689 
1690  // Delete dead memory defs
1692  MemorySSAUpdater Updater(&MSSA);
1693  SmallVector<Instruction *, 32> NowDeadInsts;
1694  NowDeadInsts.push_back(SI);
1695  --NumFastOther;
1696 
1697  while (!NowDeadInsts.empty()) {
1698  Instruction *DeadInst = NowDeadInsts.pop_back_val();
1699  ++NumFastOther;
1700 
1701  // Try to preserve debug information attached to the dead instruction.
1702  salvageDebugInfo(*DeadInst);
1703  salvageKnowledge(DeadInst);
1704 
1705  // Remove the Instruction from MSSA.
1706  if (MemoryAccess *MA = MSSA.getMemoryAccess(DeadInst)) {
1707  if (MemoryDef *MD = dyn_cast<MemoryDef>(MA)) {
1708  SkipStores.insert(MD);
1709  }
1710  Updater.removeMemoryAccess(MA);
1711  }
1712 
1713  auto I = IOLs.find(DeadInst->getParent());
1714  if (I != IOLs.end())
1715  I->second.erase(DeadInst);
1716  // Remove its operands
1717  for (Use &O : DeadInst->operands())
1718  if (Instruction *OpI = dyn_cast<Instruction>(O)) {
1719  O = nullptr;
1720  if (isInstructionTriviallyDead(OpI, &TLI))
1721  NowDeadInsts.push_back(OpI);
1722  }
1723 
1724  DeadInst->eraseFromParent();
1725  }
1726  }
1727 
1728  // Check for any extra throws between \p KillingI and \p DeadI that block
1729  // DSE. This only checks extra maythrows (those that aren't MemoryDef's).
1730  // MemoryDef that may throw are handled during the walk from one def to the
1731  // next.
1732  bool mayThrowBetween(Instruction *KillingI, Instruction *DeadI,
1733  const Value *KillingUndObj) {
1734  // First see if we can ignore it by using the fact that KillingI is an
1735  // alloca/alloca like object that is not visible to the caller during
1736  // execution of the function.
1737  if (KillingUndObj && isInvisibleToCallerBeforeRet(KillingUndObj))
1738  return false;
1739 
1740  if (KillingI->getParent() == DeadI->getParent())
1741  return ThrowingBlocks.count(KillingI->getParent());
1742  return !ThrowingBlocks.empty();
1743  }
1744 
1745  // Check if \p DeadI acts as a DSE barrier for \p KillingI. The following
1746  // instructions act as barriers:
1747  // * A memory instruction that may throw and \p KillingI accesses a non-stack
1748  // object.
1749  // * Atomic stores stronger that monotonic.
1750  bool isDSEBarrier(const Value *KillingUndObj, Instruction *DeadI) {
1751  // If DeadI may throw it acts as a barrier, unless we are to an
1752  // alloca/alloca like object that does not escape.
1753  if (DeadI->mayThrow() && !isInvisibleToCallerBeforeRet(KillingUndObj))
1754  return true;
1755 
1756  // If DeadI is an atomic load/store stronger than monotonic, do not try to
1757  // eliminate/reorder it.
1758  if (DeadI->isAtomic()) {
1759  if (auto *LI = dyn_cast<LoadInst>(DeadI))
1760  return isStrongerThanMonotonic(LI->getOrdering());
1761  if (auto *SI = dyn_cast<StoreInst>(DeadI))
1762  return isStrongerThanMonotonic(SI->getOrdering());
1763  if (auto *ARMW = dyn_cast<AtomicRMWInst>(DeadI))
1764  return isStrongerThanMonotonic(ARMW->getOrdering());
1765  if (auto *CmpXchg = dyn_cast<AtomicCmpXchgInst>(DeadI))
1766  return isStrongerThanMonotonic(CmpXchg->getSuccessOrdering()) ||
1767  isStrongerThanMonotonic(CmpXchg->getFailureOrdering());
1768  llvm_unreachable("other instructions should be skipped in MemorySSA");
1769  }
1770  return false;
1771  }
1772 
1773  /// Eliminate writes to objects that are not visible in the caller and are not
1774  /// accessed before returning from the function.
1775  bool eliminateDeadWritesAtEndOfFunction() {
1776  bool MadeChange = false;
1777  LLVM_DEBUG(
1778  dbgs()
1779  << "Trying to eliminate MemoryDefs at the end of the function\n");
1780  for (int I = MemDefs.size() - 1; I >= 0; I--) {
1781  MemoryDef *Def = MemDefs[I];
1782  if (SkipStores.contains(Def) || !isRemovable(Def->getMemoryInst()))
1783  continue;
1784 
1785  Instruction *DefI = Def->getMemoryInst();
1786  auto DefLoc = getLocForWriteEx(DefI);
1787  if (!DefLoc)
1788  continue;
1789 
1790  // NOTE: Currently eliminating writes at the end of a function is limited
1791  // to MemoryDefs with a single underlying object, to save compile-time. In
1792  // practice it appears the case with multiple underlying objects is very
1793  // uncommon. If it turns out to be important, we can use
1794  // getUnderlyingObjects here instead.
1795  const Value *UO = getUnderlyingObject(DefLoc->Ptr);
1796  if (!UO || !isInvisibleToCallerAfterRet(UO))
1797  continue;
1798 
1799  if (isWriteAtEndOfFunction(Def)) {
1800  // See through pointer-to-pointer bitcasts
1801  LLVM_DEBUG(dbgs() << " ... MemoryDef is not accessed until the end "
1802  "of the function\n");
1803  deleteDeadInstruction(DefI);
1804  ++NumFastStores;
1805  MadeChange = true;
1806  }
1807  }
1808  return MadeChange;
1809  }
1810 
1811  /// \returns true if \p Def is a no-op store, either because it
1812  /// directly stores back a loaded value or stores zero to a calloced object.
1813  bool storeIsNoop(MemoryDef *Def, const Value *DefUO) {
1814  StoreInst *Store = dyn_cast<StoreInst>(Def->getMemoryInst());
1815  MemSetInst *MemSet = dyn_cast<MemSetInst>(Def->getMemoryInst());
1816  Constant *StoredConstant = nullptr;
1817  if (Store)
1818  StoredConstant = dyn_cast<Constant>(Store->getOperand(0));
1819  if (MemSet)
1820  StoredConstant = dyn_cast<Constant>(MemSet->getValue());
1821 
1822  if (StoredConstant && StoredConstant->isNullValue()) {
1823  auto *DefUOInst = dyn_cast<Instruction>(DefUO);
1824  if (DefUOInst) {
1825  if (isCallocLikeFn(DefUOInst, &TLI)) {
1826  auto *UnderlyingDef =
1827  cast<MemoryDef>(MSSA.getMemoryAccess(DefUOInst));
1828  // If UnderlyingDef is the clobbering access of Def, no instructions
1829  // between them can modify the memory location.
1830  auto *ClobberDef =
1832  return UnderlyingDef == ClobberDef;
1833  }
1834 
1835  if (MemSet) {
1836  if (F.hasFnAttribute(Attribute::SanitizeMemory) ||
1837  F.hasFnAttribute(Attribute::SanitizeAddress) ||
1838  F.hasFnAttribute(Attribute::SanitizeHWAddress) ||
1839  F.getName() == "calloc")
1840  return false;
1841  auto *Malloc = const_cast<CallInst *>(dyn_cast<CallInst>(DefUOInst));
1842  if (!Malloc)
1843  return false;
1844  auto *InnerCallee = Malloc->getCalledFunction();
1845  if (!InnerCallee)
1846  return false;
1847  LibFunc Func;
1848  if (!TLI.getLibFunc(*InnerCallee, Func) || !TLI.has(Func) ||
1849  Func != LibFunc_malloc)
1850  return false;
1851  if (Malloc->getOperand(0) == MemSet->getLength()) {
1852  if (DT.dominates(Malloc, MemSet) && PDT.dominates(MemSet, Malloc) &&
1853  memoryIsNotModifiedBetween(Malloc, MemSet, BatchAA, DL, &DT)) {
1854  IRBuilder<> IRB(Malloc);
1855  const auto &DL = Malloc->getModule()->getDataLayout();
1856  if (auto *Calloc =
1857  emitCalloc(ConstantInt::get(IRB.getIntPtrTy(DL), 1),
1858  Malloc->getArgOperand(0), IRB, TLI)) {
1859  MemorySSAUpdater Updater(&MSSA);
1860  auto *LastDef = cast<MemoryDef>(
1861  Updater.getMemorySSA()->getMemoryAccess(Malloc));
1862  auto *NewAccess = Updater.createMemoryAccessAfter(
1863  cast<Instruction>(Calloc), LastDef, LastDef);
1864  auto *NewAccessMD = cast<MemoryDef>(NewAccess);
1865  Updater.insertDef(NewAccessMD, /*RenameUses=*/true);
1866  Updater.removeMemoryAccess(Malloc);
1867  Malloc->replaceAllUsesWith(Calloc);
1868  Malloc->eraseFromParent();
1869  return true;
1870  }
1871  return false;
1872  }
1873  }
1874  }
1875  }
1876  }
1877 
1878  if (!Store)
1879  return false;
1880 
1881  if (auto *LoadI = dyn_cast<LoadInst>(Store->getOperand(0))) {
1882  if (LoadI->getPointerOperand() == Store->getOperand(1)) {
1883  // Get the defining access for the load.
1884  auto *LoadAccess = MSSA.getMemoryAccess(LoadI)->getDefiningAccess();
1885  // Fast path: the defining accesses are the same.
1886  if (LoadAccess == Def->getDefiningAccess())
1887  return true;
1888 
1889  // Look through phi accesses. Recursively scan all phi accesses by
1890  // adding them to a worklist. Bail when we run into a memory def that
1891  // does not match LoadAccess.
1892  SetVector<MemoryAccess *> ToCheck;
1893  MemoryAccess *Current =
1895  // We don't want to bail when we run into the store memory def. But,
1896  // the phi access may point to it. So, pretend like we've already
1897  // checked it.
1898  ToCheck.insert(Def);
1899  ToCheck.insert(Current);
1900  // Start at current (1) to simulate already having checked Def.
1901  for (unsigned I = 1; I < ToCheck.size(); ++I) {
1902  Current = ToCheck[I];
1903  if (auto PhiAccess = dyn_cast<MemoryPhi>(Current)) {
1904  // Check all the operands.
1905  for (auto &Use : PhiAccess->incoming_values())
1906  ToCheck.insert(cast<MemoryAccess>(&Use));
1907  continue;
1908  }
1909 
1910  // If we found a memory def, bail. This happens when we have an
1911  // unrelated write in between an otherwise noop store.
1912  assert(isa<MemoryDef>(Current) &&
1913  "Only MemoryDefs should reach here.");
1914  // TODO: Skip no alias MemoryDefs that have no aliasing reads.
1915  // We are searching for the definition of the store's destination.
1916  // So, if that is the same definition as the load, then this is a
1917  // noop. Otherwise, fail.
1918  if (LoadAccess != Current)
1919  return false;
1920  }
1921  return true;
1922  }
1923  }
1924 
1925  return false;
1926  }
1927 };
1928 
1929 static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
1930  DominatorTree &DT, PostDominatorTree &PDT,
1931  const TargetLibraryInfo &TLI,
1932  const LoopInfo &LI) {
1933  bool MadeChange = false;
1934 
1935  DSEState State = DSEState::get(F, AA, MSSA, DT, PDT, TLI, LI);
1936  // For each store:
1937  for (unsigned I = 0; I < State.MemDefs.size(); I++) {
1938  MemoryDef *KillingDef = State.MemDefs[I];
1939  if (State.SkipStores.count(KillingDef))
1940  continue;
1941  Instruction *KillingI = KillingDef->getMemoryInst();
1942 
1943  Optional<MemoryLocation> MaybeKillingLoc;
1944  if (State.isMemTerminatorInst(KillingI))
1945  MaybeKillingLoc = State.getLocForTerminator(KillingI).map(
1946  [](const std::pair<MemoryLocation, bool> &P) { return P.first; });
1947  else
1948  MaybeKillingLoc = State.getLocForWriteEx(KillingI);
1949 
1950  if (!MaybeKillingLoc) {
1951  LLVM_DEBUG(dbgs() << "Failed to find analyzable write location for "
1952  << *KillingI << "\n");
1953  continue;
1954  }
1955  MemoryLocation KillingLoc = *MaybeKillingLoc;
1956  assert(KillingLoc.Ptr && "KillingLoc should not be null");
1957  const Value *KillingUndObj = getUnderlyingObject(KillingLoc.Ptr);
1958 
1959  MemoryAccess *Current = KillingDef;
1960  LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs killed by "
1961  << *KillingDef << " (" << *KillingI << ")\n");
1962 
1963  unsigned ScanLimit = MemorySSAScanLimit;
1964  unsigned WalkerStepLimit = MemorySSAUpwardsStepLimit;
1965  unsigned PartialLimit = MemorySSAPartialStoreLimit;
1966  // Worklist of MemoryAccesses that may be killed by KillingDef.
1967  SetVector<MemoryAccess *> ToCheck;
1968 
1969  if (KillingUndObj)
1970  ToCheck.insert(KillingDef->getDefiningAccess());
1971 
1972  bool Shortend = false;
1973  bool IsMemTerm = State.isMemTerminatorInst(KillingI);
1974  // Check if MemoryAccesses in the worklist are killed by KillingDef.
1975  for (unsigned I = 0; I < ToCheck.size(); I++) {
1976  Current = ToCheck[I];
1977  if (State.SkipStores.count(Current))
1978  continue;
1979 
1980  Optional<MemoryAccess *> MaybeDeadAccess = State.getDomMemoryDef(
1981  KillingDef, Current, KillingLoc, KillingUndObj, ScanLimit,
1982  WalkerStepLimit, IsMemTerm, PartialLimit);
1983 
1984  if (!MaybeDeadAccess) {
1985  LLVM_DEBUG(dbgs() << " finished walk\n");
1986  continue;
1987  }
1988 
1989  MemoryAccess *DeadAccess = *MaybeDeadAccess;
1990  LLVM_DEBUG(dbgs() << " Checking if we can kill " << *DeadAccess);
1991  if (isa<MemoryPhi>(DeadAccess)) {
1992  LLVM_DEBUG(dbgs() << "\n ... adding incoming values to worklist\n");
1993  for (Value *V : cast<MemoryPhi>(DeadAccess)->incoming_values()) {
1994  MemoryAccess *IncomingAccess = cast<MemoryAccess>(V);
1995  BasicBlock *IncomingBlock = IncomingAccess->getBlock();
1996  BasicBlock *PhiBlock = DeadAccess->getBlock();
1997 
1998  // We only consider incoming MemoryAccesses that come before the
1999  // MemoryPhi. Otherwise we could discover candidates that do not
2000  // strictly dominate our starting def.
2001  if (State.PostOrderNumbers[IncomingBlock] >
2002  State.PostOrderNumbers[PhiBlock])
2003  ToCheck.insert(IncomingAccess);
2004  }
2005  continue;
2006  }
2007  auto *DeadDefAccess = cast<MemoryDef>(DeadAccess);
2008  Instruction *DeadI = DeadDefAccess->getMemoryInst();
2009  LLVM_DEBUG(dbgs() << " (" << *DeadI << ")\n");
2010  ToCheck.insert(DeadDefAccess->getDefiningAccess());
2011  NumGetDomMemoryDefPassed++;
2012 
2013  if (!DebugCounter::shouldExecute(MemorySSACounter))
2014  continue;
2015 
2016  MemoryLocation DeadLoc = *State.getLocForWriteEx(DeadI);
2017 
2018  if (IsMemTerm) {
2019  const Value *DeadUndObj = getUnderlyingObject(DeadLoc.Ptr);
2020  if (KillingUndObj != DeadUndObj)
2021  continue;
2022  LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *DeadI
2023  << "\n KILLER: " << *KillingI << '\n');
2024  State.deleteDeadInstruction(DeadI);
2025  ++NumFastStores;
2026  MadeChange = true;
2027  } else {
2028  // Check if DeadI overwrites KillingI.
2029  int64_t KillingOffset = 0;
2030  int64_t DeadOffset = 0;
2031  OverwriteResult OR = State.isOverwrite(
2032  KillingI, DeadI, KillingLoc, DeadLoc, KillingOffset, DeadOffset);
2033  if (OR == OW_MaybePartial) {
2034  auto Iter = State.IOLs.insert(
2035  std::make_pair<BasicBlock *, InstOverlapIntervalsTy>(
2036  DeadI->getParent(), InstOverlapIntervalsTy()));
2037  auto &IOL = Iter.first->second;
2038  OR = isPartialOverwrite(KillingLoc, DeadLoc, KillingOffset,
2039  DeadOffset, DeadI, IOL);
2040  }
2041 
2042  if (EnablePartialStoreMerging && OR == OW_PartialEarlierWithFullLater) {
2043  auto *DeadSI = dyn_cast<StoreInst>(DeadI);
2044  auto *KillingSI = dyn_cast<StoreInst>(KillingI);
2045  // We are re-using tryToMergePartialOverlappingStores, which requires
2046  // DeadSI to dominate DeadSI.
2047  // TODO: implement tryToMergeParialOverlappingStores using MemorySSA.
2048  if (DeadSI && KillingSI && DT.dominates(DeadSI, KillingSI)) {
2050  KillingSI, DeadSI, KillingOffset, DeadOffset, State.DL,
2051  State.BatchAA, &DT)) {
2052 
2053  // Update stored value of earlier store to merged constant.
2054  DeadSI->setOperand(0, Merged);
2055  ++NumModifiedStores;
2056  MadeChange = true;
2057 
2058  Shortend = true;
2059  // Remove killing store and remove any outstanding overlap
2060  // intervals for the updated store.
2061  State.deleteDeadInstruction(KillingSI);
2062  auto I = State.IOLs.find(DeadSI->getParent());
2063  if (I != State.IOLs.end())
2064  I->second.erase(DeadSI);
2065  break;
2066  }
2067  }
2068  }
2069 
2070  if (OR == OW_Complete) {
2071  LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *DeadI
2072  << "\n KILLER: " << *KillingI << '\n');
2073  State.deleteDeadInstruction(DeadI);
2074  ++NumFastStores;
2075  MadeChange = true;
2076  }
2077  }
2078  }
2079 
2080  // Check if the store is a no-op.
2081  if (!Shortend && isRemovable(KillingI) &&
2082  State.storeIsNoop(KillingDef, KillingUndObj)) {
2083  LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " << *KillingI
2084  << '\n');
2085  State.deleteDeadInstruction(KillingI);
2086  NumRedundantStores++;
2087  MadeChange = true;
2088  continue;
2089  }
2090  }
2091 
2093  for (auto &KV : State.IOLs)
2094  MadeChange |= removePartiallyOverlappedStores(State.DL, KV.second, TLI);
2095 
2096  MadeChange |= State.eliminateDeadWritesAtEndOfFunction();
2097  return MadeChange;
2098 }
2099 } // end anonymous namespace
2100 
2101 //===----------------------------------------------------------------------===//
2102 // DSE Pass
2103 //===----------------------------------------------------------------------===//
2105  AliasAnalysis &AA = AM.getResult<AAManager>(F);
2108  MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
2110  LoopInfo &LI = AM.getResult<LoopAnalysis>(F);
2111 
2112  bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, LI);
2113 
2114 #ifdef LLVM_ENABLE_STATS
2115  if (AreStatisticsEnabled())
2116  for (auto &I : instructions(F))
2117  NumRemainingStores += isa<StoreInst>(&I);
2118 #endif
2119 
2120  if (!Changed)
2121  return PreservedAnalyses::all();
2122 
2123  PreservedAnalyses PA;
2124  PA.preserveSet<CFGAnalyses>();
2126  PA.preserve<LoopAnalysis>();
2127  return PA;
2128 }
2129 
2130 namespace {
2131 
2132 /// A legacy pass for the legacy pass manager that wraps \c DSEPass.
2133 class DSELegacyPass : public FunctionPass {
2134 public:
2135  static char ID; // Pass identification, replacement for typeid
2136 
2137  DSELegacyPass() : FunctionPass(ID) {
2139  }
2140 
2141  bool runOnFunction(Function &F) override {
2142  if (skipFunction(F))
2143  return false;
2144 
2145  AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
2146  DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2147  const TargetLibraryInfo &TLI =
2148  getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
2149  MemorySSA &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
2150  PostDominatorTree &PDT =
2151  getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
2152  LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2153 
2154  bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, LI);
2155 
2156 #ifdef LLVM_ENABLE_STATS
2157  if (AreStatisticsEnabled())
2158  for (auto &I : instructions(F))
2159  NumRemainingStores += isa<StoreInst>(&I);
2160 #endif
2161 
2162  return Changed;
2163  }
2164 
2165  void getAnalysisUsage(AnalysisUsage &AU) const override {
2166  AU.setPreservesCFG();
2178  }
2179 };
2180 
2181 } // end anonymous namespace
2182 
2183 char DSELegacyPass::ID = 0;
2184 
2185 INITIALIZE_PASS_BEGIN(DSELegacyPass, "dse", "Dead Store Elimination", false,
2186  false)
2196  false)
2197 
2199  return new DSELegacyPass();
2200 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
dse
dse
Definition: DeadStoreElimination.cpp:2195
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
llvm::BatchAAResults
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
Definition: AliasAnalysis.h:896
llvm::Argument
This class represents an incoming formal argument to a Function.
Definition: Argument.h:29
llvm::AAManager
A manager for alias analyses.
Definition: AliasAnalysis.h:1233
getPointerSize
static uint64_t getPointerSize(const Value *V, const DataLayout &DL, const TargetLibraryInfo &TLI, const Function *F)
Definition: DeadStoreElimination.cpp:312
llvm::predecessors
pred_range predecessors(BasicBlock *BB)
Definition: CFG.h:127
llvm::isAligned
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:138
llvm::AliasResult::PartialAlias
@ PartialAlias
The two locations alias, but only due to a partial overlap.
Definition: AliasAnalysis.h:104
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:103
MathExtras.h
llvm::MemoryLocation::get
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
Definition: MemoryLocation.cpp:37
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
llvm::DominatorTreeBase::findNearestCommonDominator
NodeT * findNearestCommonDominator(NodeT *A, NodeT *B) const
Find nearest common dominator basic block for basic block A and B.
Definition: GenericDomTree.h:468
llvm::tgtok::Def
@ Def
Definition: TGLexer.h:50
llvm::isCallocLikeFn
bool isCallocLikeFn(const Value *V, const TargetLibraryInfo *TLI, bool LookThroughBitCast=false)
Tests if a value is a call or invoke to a library function that allocates zero-filled memory (such as...
Definition: MemoryBuiltins.cpp:290
llvm::drop_begin
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:266
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:112
llvm::ISD::OR
@ OR
Definition: ISDOpcodes.h:633
llvm::Type::getInt8PtrTy
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:255
llvm::User::operands
op_range operands()
Definition: User.h:242
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:90
llvm::PHITransAddr
PHITransAddr - An address value which tracks and handles phi translation.
Definition: PHITransAddr.h:35
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
IntrinsicInst.h
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:779
llvm::MemoryLocation::Ptr
const Value * Ptr
The address of the start of the location.
Definition: MemoryLocation.h:217
Scalar.h
InstIterator.h
MemorySSAUpdater.h
llvm::Function
Definition: Function.h:61
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
StringRef.h
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
Pass.h
llvm::IntrinsicInst::getIntrinsicID
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:52
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::size
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:77
llvm::emitCalloc
Value * emitCalloc(Value *Num, Value *Size, IRBuilderBase &B, const TargetLibraryInfo &TLI)
Emit a call to the calloc function.
Definition: BuildLibCalls.cpp:1656
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
Statistic.h
CaptureTracking.h
ErrorHandling.h
llvm::Type::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:734
llvm::Function::getEntryBlock
const BasicBlock & getEntryBlock() const
Definition: Function.h:717
llvm::IRBuilder<>
MapVector.h
DeadStoreElimination.h
ValueTracking.h
Local.h
llvm::isAllocLikeFn
bool isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI, bool LookThroughBitCast=false)
Tests if a value is a call or invoke to a library function that allocates memory (either malloc,...
Definition: MemoryBuiltins.cpp:305
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
GlobalsModRef.h
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::APInt::getBitsSet
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:243
APInt.h
MemorySSASameBBStepCost
static cl::opt< unsigned > MemorySSASameBBStepCost("dse-memoryssa-samebb-cost", cl::init(1), cl::Hidden, cl::desc("The cost of a step in the same basic block as the killing MemoryDef" "(default = 1)"))
llvm::MemoryLocation::Size
LocationSize Size
The maximum size of the location, in address-units, or UnknownSize if the size is not known.
Definition: MemoryLocation.h:226
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
DenseMap.h
llvm::APInt::getBitWidth
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1403
Module.h
MemoryBuiltins.h
llvm::ObjectSizeOpts::NullIsUnknownSize
bool NullIsUnknownSize
If this is true, null pointers in address space 0 will be treated as though they can't be evaluated.
Definition: MemoryBuiltins.h:208
llvm::LoopInfoWrapperPass
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:1268
llvm::createDeadStoreEliminationPass
FunctionPass * createDeadStoreEliminationPass()
Definition: DeadStoreElimination.cpp:2198
llvm::Optional
Definition: APInt.h:33
llvm::IntervalMap::empty
bool empty() const
empty - Return true when no intervals are mapped.
Definition: IntervalMap.h:1055
llvm::PHITransAddr::IsPotentiallyPHITranslatable
bool IsPotentiallyPHITranslatable() const
IsPotentiallyPHITranslatable - If this needs PHI translation, return true if we have some hope of doi...
Definition: PHITransAddr.cpp:114
llvm::AliasResult
The possible results of an alias query.
Definition: AliasAnalysis.h:81
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:449
llvm::Instruction::mayThrow
bool mayThrow() const
Return true if this instruction may throw an exception.
Definition: Instruction.cpp:673
llvm::SmallVectorImpl::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:635
tryToShortenBegin
static bool tryToShortenBegin(Instruction *DeadI, OverlapIntervalsTy &IntervalMap, int64_t &DeadStart, uint64_t &DeadSize)
Definition: DeadStoreElimination.cpp:703
llvm::LocationSize::isPrecise
bool isPrecise() const
Definition: MemoryLocation.h:165
llvm::BitmaskEnumDetail::Mask
std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
llvm::Type::getInt8Ty
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:201
llvm::BatchAAResults::getModRefInfo
ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc)
Definition: AliasAnalysis.h:908
MustExecute.h
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::MemSetBase::getValue
Value * getValue() const
Definition: IntrinsicInst.h:766
llvm::AtomicOrdering::Monotonic
@ Monotonic
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
AliasAnalysis.h
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::DominatorTree::dominates
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:115
Instruction.h
CommandLine.h
MemorySSAScanLimit
static cl::opt< unsigned > MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(150), cl::Hidden, cl::desc("The number of memory instructions to scan for " "dead store elimination (default = 150)"))
llvm::StoreInst::getValueOperand
Value * getValueOperand()
Definition: Instructions.h:398
llvm::AreStatisticsEnabled
bool AreStatisticsEnabled()
Check if statistics are enabled.
Definition: Statistic.cpp:138
llvm::MemorySSAWrapperPass
Legacy analysis pass which computes MemorySSA.
Definition: MemorySSA.h:981
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
Constants.h
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
llvm::AAResults
Definition: AliasAnalysis.h:456
PostDominators.h
llvm::Constant::isNullValue
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:74
llvm::MemorySSA::isLiveOnEntryDef
bool isLiveOnEntryDef(const MemoryAccess *MA) const
Return true if MA represents the live on entry value.
Definition: MemorySSA.h:742
hasAnalyzableMemoryWrite
static bool hasAnalyzableMemoryWrite(Instruction *I, const TargetLibraryInfo &TLI)
Does this instruction write some memory? This only returns true for things that we can analyze with o...
Definition: DeadStoreElimination.cpp:170
OverlapIntervalsTy
std::map< int64_t, int64_t > OverlapIntervalsTy
Definition: DeadStoreElimination.cpp:165
llvm::LibFunc
LibFunc
Definition: TargetLibraryInfo.h:34
Intrinsics.h
InstrTypes.h
llvm::CallBase::getCalledFunction
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation.
Definition: InstrTypes.h:1393
llvm::PostDominatorTreeWrapperPass
Definition: PostDominators.h:73
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::LocationSize::getValue
uint64_t getValue() const
Definition: MemoryLocation.h:158
llvm::AliasResult::hasOffset
constexpr bool hasOffset() const
Definition: AliasAnalysis.h:117
TargetLibraryInfo.h
AssumeBundleBuilder.h
llvm::Value::uses
iterator_range< use_iterator > uses()
Definition: Value.h:377
false
Definition: StackSlotColoring.cpp:142
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::TargetLibraryInfo::getLibFunc
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
Definition: TargetLibraryInfo.h:289
llvm::PatternMatch::m_ConstantInt
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:145
llvm::Instruction
Definition: Instruction.h:45
llvm::DebugCounter::shouldExecute
static bool shouldExecute(unsigned CounterName)
Definition: DebugCounter.h:74
Elimination
Dead Store Elimination
Definition: DeadStoreElimination.cpp:2195
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:287
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:900
llvm::Use::getUser
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:73
llvm::getUnderlyingObject
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value,...
Definition: ValueTracking.cpp:4377
llvm::MemoryLocation::UnknownSize
@ UnknownSize
Definition: MemoryLocation.h:214
SmallPtrSet.h
llvm::DominatorTreeBase::roots
iterator_range< root_iterator > roots()
Definition: GenericDomTree.h:304
PatternMatch.h
llvm::MemoryUseOrDef::getMemoryInst
Instruction * getMemoryInst() const
Get the instruction that this MemoryUse represents.
Definition: MemorySSA.h:254
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::Instruction::mayWriteToMemory
bool mayWriteToMemory() const
Return true if this instruction may modify memory.
Definition: Instruction.cpp:584
llvm::isModSet
LLVM_NODISCARD bool isModSet(const ModRefInfo MRI)
Definition: AliasAnalysis.h:196
llvm::getObjectSize
bool getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout &DL, const TargetLibraryInfo *TLI, ObjectSizeOpts Opts={})
Compute the size of the object pointed by Ptr.
Definition: MemoryBuiltins.cpp:514
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(DSELegacyPass, "dse", "Dead Store Elimination", false, false) INITIALIZE_PASS_END(DSELegacyPass
llvm::None
const NoneType None
Definition: None.h:23
llvm::MemoryAccess::getBlock
BasicBlock * getBlock() const
Definition: MemorySSA.h:159
memoryIsNotModifiedBetween
static bool memoryIsNotModifiedBetween(Instruction *FirstI, Instruction *SecondI, BatchAAResults &AA, const DataLayout &DL, DominatorTree *DT)
Returns true if the memory which is accessed by the second instruction is not modified between the fi...
Definition: DeadStoreElimination.cpp:490
isShortenableAtTheBeginning
static bool isShortenableAtTheBeginning(Instruction *I)
Returns true if the beginning of this instruction can be safely shortened in length.
Definition: DeadStoreElimination.cpp:306
llvm::MemSetInst
This class wraps the llvm.memset intrinsic.
Definition: IntrinsicInst.h:905
llvm::AliasResult::getOffset
constexpr int32_t getOffset() const
Definition: AliasAnalysis.h:118
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
LoopInfo.h
llvm::MemorySSAWalker::getClobberingMemoryAccess
MemoryAccess * getClobberingMemoryAccess(const Instruction *I)
Given a memory Mod/Ref/ModRef'ing instruction, calling this will give you the nearest dominating Memo...
Definition: MemorySSA.h:1041
BasicBlock.h
llvm::cl::opt< bool >
llvm::RISCVFenceField::O
@ O
Definition: RISCVBaseInfo.h:192
llvm::instructions
inst_range instructions(Function *F)
Definition: InstIterator.h:133
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:304
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
getLocForWrite
static MemoryLocation getLocForWrite(Instruction *Inst, const TargetLibraryInfo &TLI)
Return a Location stored to by the specified instruction.
Definition: DeadStoreElimination.cpp:211
llvm::Instruction::eraseFromParent
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:78
llvm::MemoryLocation::getOrNone
static Optional< MemoryLocation > getOrNone(const Instruction *Inst)
Definition: MemoryLocation.cpp:78
llvm::BatchAAResults::alias
AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
Definition: AliasAnalysis.h:902
llvm::TargetLibraryInfoWrapperPass
Definition: TargetLibraryInfo.h:463
uint64_t
D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
llvm::MemorySSAUpdater
Definition: MemorySSAUpdater.h:56
Addr
uint64_t Addr
Definition: ELFObjHandler.cpp:80
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::PreservedAnalyses::preserve
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:176
MemoryLocation.h
llvm::DenseMap
Definition: DenseMap.h:714
llvm::MemorySSA
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:705
llvm::LoopInfoBase::getLoopFor
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:967
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::MemorySSA::getMemoryAccess
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
Definition: MemorySSA.h:722
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
llvm::SmallPtrSetImpl::begin
iterator begin() const
Definition: SmallPtrSet.h:402
llvm::MemoryDef
Represents a read-write access to memory, whether it is a must-alias, or a may-alias.
Definition: MemorySSA.h:377
llvm::Instruction::setDebugLoc
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:367
EnablePartialStoreMerging
static cl::opt< bool > EnablePartialStoreMerging("enable-dse-partial-store-merging", cl::init(true), cl::Hidden, cl::desc("Enable partial store merging in DSE"))
llvm::GetElementPtrInst::CreateInBounds
static GetElementPtrInst * CreateInBounds(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Create an "inbounds" getelementptr.
Definition: Instructions.h:990
llvm::DenseMapBase::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::MemoryLocation::getForArgument
static MemoryLocation getForArgument(const CallBase *Call, unsigned ArgIdx, const TargetLibraryInfo *TLI)
Return a location representing a particular argument of a call.
Definition: MemoryLocation.cpp:131
memcpy
<%struct.s * > cast struct s *S to sbyte *< sbyte * > sbyte uint cast struct s *agg result to sbyte *< sbyte * > sbyte uint cast struct s *memtmp to sbyte *< sbyte * > sbyte uint ret void llc ends up issuing two memcpy or custom lower memcpy(of small size) to be ldmia/stmia. I think option 2 is better but the current register allocator cannot allocate a chunk of registers at a time. A feasible temporary solution is to use specific physical registers at the lowering time for small(<
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::SPII::Store
@ Store
Definition: SparcInstrInfo.h:33
llvm::DominatorTree::isReachableFromEntry
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:328
llvm::TargetLibraryInfo::has
bool has(LibFunc F) const
Tests whether a library function is available.
Definition: TargetLibraryInfo.h:323
MemorySSAOtherBBStepCost
static cl::opt< unsigned > MemorySSAOtherBBStepCost("dse-memoryssa-otherbb-cost", cl::init(5), cl::Hidden, cl::desc("The cost of a step in a different basic " "block than the killing MemoryDef" "(default = 5)"))
isMaskedStoreOverwrite
static OverwriteResult isMaskedStoreOverwrite(const Instruction *KillingI, const Instruction *DeadI, BatchAAResults &AA)
Check if two instruction are masked stores that completely overwrite one another.
Definition: DeadStoreElimination.cpp:340
llvm::salvageKnowledge
void salvageKnowledge(Instruction *I, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr)
Calls BuildAssumeFromInst and if the resulting llvm.assume is valid insert if before I.
Definition: AssumeBundleBuilder.cpp:292
llvm::PHITransAddr::PHITranslateValue
bool PHITranslateValue(BasicBlock *CurBB, BasicBlock *PredBB, const DominatorTree *DT, bool MustDominate)
PHITranslateValue - PHI translate the current address up the CFG from CurBB to Pred,...
Definition: PHITransAddr.cpp:312
EnablePartialOverwriteTracking
static cl::opt< bool > EnablePartialOverwriteTracking("enable-dse-partial-overwrite-tracking", cl::init(true), cl::Hidden, cl::desc("Enable partial-overwrite tracking in DSE"))
llvm::MemorySSAAnalysis
An analysis that produces MemorySSA for a function.
Definition: MemorySSA.h:931
llvm::MemorySSA::getWalker
MemorySSAWalker * getWalker()
Definition: MemorySSA.cpp:1598
llvm::AMDGPU::IsaInfo::TargetIDSetting::Off
@ Off
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
MemorySSAPathCheckLimit
static cl::opt< unsigned > MemorySSAPathCheckLimit("dse-memoryssa-path-check-limit", cl::init(50), cl::Hidden, cl::desc("The maximum number of blocks to check when trying to prove that " "all paths to an exit go through a killing block (default = 50)"))
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
llvm::isInstructionTriviallyDead
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction has no side ef...
Definition: Local.cpp:398
llvm::LoopInfo
Definition: LoopInfo.h:1083
tryToMergePartialOverlappingStores
static Constant * tryToMergePartialOverlappingStores(StoreInst *KillingI, StoreInst *DeadI, int64_t KillingOffset, int64_t DeadOffset, const DataLayout &DL, BatchAAResults &AA, DominatorTree *DT)
Definition: DeadStoreElimination.cpp:755
llvm::PointerMayBeCaptured
bool PointerMayBeCaptured(const Value *V, bool ReturnCaptures, bool StoreCaptures, unsigned MaxUsesToExplore=0)
PointerMayBeCaptured - Return true if this pointer value may be captured by the enclosing function (w...
Definition: CaptureTracking.cpp:155
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::IntervalMap::end
const_iterator end() const
Definition: IntervalMap.h:1112
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1558
DataLayout.h
llvm::MemorySSA::getSkipSelfWalker
MemorySSAWalker * getSkipSelfWalker()
Definition: MemorySSA.cpp:1613
llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:253
llvm::PostDominatorTree
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
Definition: PostDominators.h:28
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:136
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
llvm::CFGAnalyses
Represents analyses that only rely on functions' control flow.
Definition: PassManager.h:116
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
getParent
static const Function * getParent(const Value *V)
Definition: BasicAliasAnalysis.cpp:776
uint32_t
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::AliasResult::MustAlias
@ MustAlias
The two locations precisely alias each other.
Definition: AliasAnalysis.h:106
llvm::ObjectSizeOpts
Various options to control the behavior of getObjectSize.
Definition: MemoryBuiltins.h:188
llvm::DominatorTreeBase::properlyDominates
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
Definition: GenericDomTree.h:392
llvm::Instruction::mayReadFromMemory
bool mayReadFromMemory() const
Return true if this instruction may read memory.
Definition: Instruction.cpp:564
llvm::MemoryAccess
Definition: MemorySSA.h:137
llvm::ifs::IFSSymbolType::Func
@ Func
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::insert
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:207
llvm::Value::stripPointerCasts
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:687
Argument.h
llvm::APInt::zext
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:950
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
llvm::Instruction::isAtomic
bool isAtomic() const
Return true if this instruction has an AtomicOrdering of unordered or higher.
Definition: Instruction.cpp:604
llvm::PHITransAddr::getAddr
Value * getAddr() const
Definition: PHITransAddr.h:59
Constant.h
llvm::BatchAAResults::isMustAlias
bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
Definition: AliasAnalysis.h:927
tryToShorten
static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart, uint64_t &DeadSize, int64_t KillingStart, uint64_t KillingSize, bool IsOverwriteEnd)
Definition: DeadStoreElimination.cpp:577
llvm::MemoryDependenceWrapperPass
A wrapper analysis pass for the legacy pass manager that exposes a MemoryDepnedenceResults instance.
Definition: MemoryDependenceAnalysis.h:525
llvm::MemoryUseOrDef::getDefiningAccess
MemoryAccess * getDefiningAccess() const
Get the access that produces the memory state used by this Use.
Definition: MemorySSA.h:257
llvm::MemIntrinsicBase::getLength
Value * getLength() const
Definition: IntrinsicInst.h:643
get
Should compile to something r4 addze r3 instead we get
Definition: README.txt:24
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::end
iterator end()
Definition: DenseMap.h:83
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
llvm::initializeDSELegacyPassPass
void initializeDSELegacyPassPass(PassRegistry &)
llvm::DSEPass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
Definition: DeadStoreElimination.cpp:2104
llvm::Align::value
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
MemorySSADefsPerBlockLimit
static cl::opt< unsigned > MemorySSADefsPerBlockLimit("dse-memoryssa-defs-per-block-limit", cl::init(5000), cl::Hidden, cl::desc("The number of MemoryDefs we consider as candidates to eliminated " "other stores per basic block (default = 5000)"))
Casting.h
llvm::post_order
iterator_range< po_iterator< T > > post_order(const T &G)
Definition: PostOrderIterator.h:188
Function.h
DebugCounter.h
PassManager.h
llvm::salvageDebugInfo
void salvageDebugInfo(Instruction &I)
Assuming the instruction I is going to be deleted, attempt to salvage debug users of I by writing the...
Definition: Local.cpp:1728
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:219
llvm::GetPointerBaseWithConstantOffset
Value * GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, const DataLayout &DL, bool AllowNonInbounds=true)
Analyze the specified pointer to see if it can be expressed as a base pointer plus a constant offset.
Definition: ValueTracking.h:279
llvm::MemoryLocation::getWithNewPtr
MemoryLocation getWithNewPtr(const Value *NewPtr) const
Definition: MemoryLocation.h:292
MemorySSAPartialStoreLimit
static cl::opt< unsigned > MemorySSAPartialStoreLimit("dse-memoryssa-partial-store-limit", cl::init(5), cl::Hidden, cl::desc("The maximum number candidates that only partially overwrite the " "killing MemoryDef to consider" " (default = 5)"))
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:45
removePartiallyOverlappedStores
static bool removePartiallyOverlappedStores(const DataLayout &DL, InstOverlapIntervalsTy &IOL, const TargetLibraryInfo &TLI)
Definition: DeadStoreElimination.cpp:732
llvm::isStrongerThan
bool isStrongerThan(AtomicOrdering AO, AtomicOrdering Other)
Returns true if ao is stronger than other as defined by the AtomicOrdering lattice,...
Definition: AtomicOrdering.h:90
DEBUG_COUNTER
DEBUG_COUNTER(MemorySSACounter, "dse-memoryssa", "Controls which MemoryDefs are eliminated.")
llvm::RegState::Dead
@ Dead
Unused definition.
Definition: MachineInstrBuilder.h:50
llvm::DominatorTreeAnalysis
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:252
MemorySSA.h
Instructions.h
PostOrderIterator.h
llvm::PreservedAnalyses::preserveSet
void preserveSet()
Mark an analysis set as preserved.
Definition: PassManager.h:191
SmallVector.h
llvm::isRefSet
LLVM_NODISCARD bool isRefSet(const ModRefInfo MRI)
Definition: AliasAnalysis.h:199
tryToShortenEnd
static bool tryToShortenEnd(Instruction *DeadI, OverlapIntervalsTy &IntervalMap, int64_t &DeadStart, uint64_t &DeadSize)
Definition: DeadStoreElimination.cpp:676
Dominators.h
llvm::MemoryLocation::getAfter
static MemoryLocation getAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location after Ptr, while remaining within the underlying objec...
Definition: MemoryLocation.h:267
llvm::SmallPtrSetImplBase::empty
LLVM_NODISCARD bool empty() const
Definition: SmallPtrSet.h:91
llvm::IntervalMap::begin
const_iterator begin() const
Definition: IntervalMap.h:1100
llvm::AAResultsWrapperPass
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Definition: AliasAnalysis.h:1281
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:94
llvm::GlobalsAAWrapperPass
Legacy wrapper pass to provide the GlobalsAAResult object.
Definition: GlobalsModRef.h:143
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
llvm::isStrongerThanMonotonic
bool isStrongerThanMonotonic(AtomicOrdering AO)
Definition: AtomicOrdering.h:124
llvm::IntervalMap
Definition: IntervalMap.h:938
isPartialOverwrite
static OverwriteResult isPartialOverwrite(const MemoryLocation &KillingLoc, const MemoryLocation &DeadLoc, int64_t KillingOff, int64_t DeadOff, Instruction *DeadI, InstOverlapIntervalsTy &IOL)
Return 'OW_Complete' if a store to the 'KillingLoc' location completely overwrites a store to the 'De...
Definition: DeadStoreElimination.cpp:372
llvm::SmallSetVector
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:307
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1475
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:172
llvm::PostDominatorTreeAnalysis
Analysis pass which computes a PostDominatorTree.
Definition: PostDominators.h:47
deleteDeadInstruction
static void deleteDeadInstruction(Instruction *I)
Definition: LoopIdiomRecognize.cpp:348
llvm::CastInst::CreatePointerCast
static CastInst * CreatePointerCast(Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd)
Create a BitCast AddrSpaceCast, or a PtrToInt cast instruction.
Definition: Instructions.cpp:3168
llvm::MemoryLocation::getForDest
static MemoryLocation getForDest(const MemIntrinsic *MI)
Return a location representing the destination of a memory set or transfer.
Definition: MemoryLocation.cpp:113
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
LLVMContext.h
MemorySSAUpwardsStepLimit
static cl::opt< unsigned > MemorySSAUpwardsStepLimit("dse-memoryssa-walklimit", cl::init(90), cl::Hidden, cl::desc("The maximum number of steps while walking upwards to find " "MemoryDefs that may be killed (default = 90)"))
llvm::cl::desc
Definition: CommandLine.h:414
raw_ostream.h
llvm::NullPointerIsDefined
bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
Definition: Function.cpp:1965
llvm::PHITransAddr::NeedsPHITranslationFromBlock
bool NeedsPHITranslationFromBlock(BasicBlock *BB) const
NeedsPHITranslationFromBlock - Return true if moving from the specified BasicBlock to its predecessor...
Definition: PHITransAddr.h:63
llvm::SetVector
A vector that has set insertion semantics.
Definition: SetVector.h:40
llvm::SmallPtrSetImpl::contains
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:388
llvm::PostDominatorTree::dominates
bool dominates(const Instruction *I1, const Instruction *I2) const
Return true if I1 dominates I2.
Definition: PostDominators.cpp:54
Value.h
InitializePasses.h
isShortenableAtTheEnd
static bool isShortenableAtTheEnd(Instruction *I)
Returns true if the end of this instruction can be safely shortened in length.
Definition: DeadStoreElimination.cpp:281
BuildLibCalls.h
llvm::offsetToAlignment
uint64_t offsetToAlignment(uint64_t Value, Align Alignment)
Returns the offset to the next integer (mod 2**64) that is greater than or equal to Value and is a mu...
Definition: Alignment.h:196
llvm::mayContainIrreducibleControl
bool mayContainIrreducibleControl(const Function &F, const LoopInfo *LI)
Definition: MustExecute.cpp:482
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
Debug.h
llvm::TargetLibraryAnalysis
Analysis pass providing the TargetLibraryInfo.
Definition: TargetLibraryInfo.h:438
llvm::MemoryLocation
Representation for a specific memory location.
Definition: MemoryLocation.h:209
isRemovable
static bool isRemovable(Instruction *I)
If the value of this instruction and the memory it writes to is unused, may we delete this instructio...
Definition: DeadStoreElimination.cpp:243
llvm::LoopAnalysis
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:1243
llvm::isFreeCall
const CallInst * isFreeCall(const Value *I, const TargetLibraryInfo *TLI)
isFreeCall - Returns non-null if the value is a call to the builtin free()
Definition: MemoryBuiltins.cpp:486
SetVector.h
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:44
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37