LLVM  14.0.0git
DeadStoreElimination.cpp
Go to the documentation of this file.
1 //===- DeadStoreElimination.cpp - MemorySSA Backed Dead Store Elimination -===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // The code below implements dead store elimination using MemorySSA. It uses
10 // the following general approach: given a MemoryDef, walk upwards to find
11 // clobbering MemoryDefs that may be killed by the starting def. Then check
12 // that there are no uses that may read the location of the original MemoryDef
13 // in between both MemoryDefs. A bit more concretely:
14 //
15 // For all MemoryDefs StartDef:
16 // 1. Get the next dominating clobbering MemoryDef (MaybeDeadAccess) by walking
17 // upwards.
18 // 2. Check that there are no reads between MaybeDeadAccess and the StartDef by
19 // checking all uses starting at MaybeDeadAccess and walking until we see
20 // StartDef.
21 // 3. For each found CurrentDef, check that:
22 // 1. There are no barrier instructions between CurrentDef and StartDef (like
23 // throws or stores with ordering constraints).
24 // 2. StartDef is executed whenever CurrentDef is executed.
25 // 3. StartDef completely overwrites CurrentDef.
26 // 4. Erase CurrentDef from the function and MemorySSA.
27 //
28 //===----------------------------------------------------------------------===//
29 
31 #include "llvm/ADT/APInt.h"
32 #include "llvm/ADT/DenseMap.h"
33 #include "llvm/ADT/MapVector.h"
35 #include "llvm/ADT/SetVector.h"
36 #include "llvm/ADT/SmallPtrSet.h"
37 #include "llvm/ADT/SmallVector.h"
38 #include "llvm/ADT/Statistic.h"
39 #include "llvm/ADT/StringRef.h"
43 #include "llvm/Analysis/LoopInfo.h"
52 #include "llvm/IR/Argument.h"
53 #include "llvm/IR/BasicBlock.h"
54 #include "llvm/IR/Constant.h"
55 #include "llvm/IR/Constants.h"
56 #include "llvm/IR/DataLayout.h"
57 #include "llvm/IR/Dominators.h"
58 #include "llvm/IR/Function.h"
59 #include "llvm/IR/IRBuilder.h"
60 #include "llvm/IR/InstIterator.h"
61 #include "llvm/IR/InstrTypes.h"
62 #include "llvm/IR/Instruction.h"
63 #include "llvm/IR/Instructions.h"
64 #include "llvm/IR/IntrinsicInst.h"
65 #include "llvm/IR/Intrinsics.h"
66 #include "llvm/IR/LLVMContext.h"
67 #include "llvm/IR/Module.h"
68 #include "llvm/IR/PassManager.h"
69 #include "llvm/IR/PatternMatch.h"
70 #include "llvm/IR/Value.h"
71 #include "llvm/InitializePasses.h"
72 #include "llvm/Pass.h"
73 #include "llvm/Support/Casting.h"
75 #include "llvm/Support/Debug.h"
80 #include "llvm/Transforms/Scalar.h"
84 #include <algorithm>
85 #include <cassert>
86 #include <cstddef>
87 #include <cstdint>
88 #include <iterator>
89 #include <map>
90 #include <utility>
91 
92 using namespace llvm;
93 using namespace PatternMatch;
94 
95 #define DEBUG_TYPE "dse"
96 
97 STATISTIC(NumRemainingStores, "Number of stores remaining after DSE");
98 STATISTIC(NumRedundantStores, "Number of redundant stores deleted");
99 STATISTIC(NumFastStores, "Number of stores deleted");
100 STATISTIC(NumFastOther, "Number of other instrs removed");
101 STATISTIC(NumCompletePartials, "Number of stores dead by later partials");
102 STATISTIC(NumModifiedStores, "Number of stores modified");
103 STATISTIC(NumCFGChecks, "Number of stores modified");
104 STATISTIC(NumCFGTries, "Number of stores modified");
105 STATISTIC(NumCFGSuccess, "Number of stores modified");
106 STATISTIC(NumGetDomMemoryDefPassed,
107  "Number of times a valid candidate is returned from getDomMemoryDef");
108 STATISTIC(NumDomMemDefChecks,
109  "Number iterations check for reads in getDomMemoryDef");
110 
111 DEBUG_COUNTER(MemorySSACounter, "dse-memoryssa",
112  "Controls which MemoryDefs are eliminated.");
113 
114 static cl::opt<bool>
115 EnablePartialOverwriteTracking("enable-dse-partial-overwrite-tracking",
116  cl::init(true), cl::Hidden,
117  cl::desc("Enable partial-overwrite tracking in DSE"));
118 
119 static cl::opt<bool>
120 EnablePartialStoreMerging("enable-dse-partial-store-merging",
121  cl::init(true), cl::Hidden,
122  cl::desc("Enable partial store merging in DSE"));
123 
124 static cl::opt<unsigned>
125  MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(150), cl::Hidden,
126  cl::desc("The number of memory instructions to scan for "
127  "dead store elimination (default = 150)"));
129  "dse-memoryssa-walklimit", cl::init(90), cl::Hidden,
130  cl::desc("The maximum number of steps while walking upwards to find "
131  "MemoryDefs that may be killed (default = 90)"));
132 
134  "dse-memoryssa-partial-store-limit", cl::init(5), cl::Hidden,
135  cl::desc("The maximum number candidates that only partially overwrite the "
136  "killing MemoryDef to consider"
137  " (default = 5)"));
138 
140  "dse-memoryssa-defs-per-block-limit", cl::init(5000), cl::Hidden,
141  cl::desc("The number of MemoryDefs we consider as candidates to eliminated "
142  "other stores per basic block (default = 5000)"));
143 
145  "dse-memoryssa-samebb-cost", cl::init(1), cl::Hidden,
146  cl::desc(
147  "The cost of a step in the same basic block as the killing MemoryDef"
148  "(default = 1)"));
149 
150 static cl::opt<unsigned>
151  MemorySSAOtherBBStepCost("dse-memoryssa-otherbb-cost", cl::init(5),
152  cl::Hidden,
153  cl::desc("The cost of a step in a different basic "
154  "block than the killing MemoryDef"
155  "(default = 5)"));
156 
158  "dse-memoryssa-path-check-limit", cl::init(50), cl::Hidden,
159  cl::desc("The maximum number of blocks to check when trying to prove that "
160  "all paths to an exit go through a killing block (default = 50)"));
161 
162 // This flags allows or disallows DSE to optimize MemorySSA during its
163 // traversal. Note that DSE optimizing MemorySSA may impact other passes
164 // downstream of the DSE invocation and can lead to issues not being
165 // reproducible in isolation (i.e. when MemorySSA is built from scratch). In
166 // those cases, the flag can be used to check if DSE's MemorySSA optimizations
167 // impact follow-up passes.
168 static cl::opt<bool>
169  OptimizeMemorySSA("dse-optimize-memoryssa", cl::init(true), cl::Hidden,
170  cl::desc("Allow DSE to optimize memory accesses."));
171 
172 //===----------------------------------------------------------------------===//
173 // Helper functions
174 //===----------------------------------------------------------------------===//
175 using OverlapIntervalsTy = std::map<int64_t, int64_t>;
177 
178 /// Returns true if the end of this instruction can be safely shortened in
179 /// length.
181  // Don't shorten stores for now
182  if (isa<StoreInst>(I))
183  return false;
184 
185  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
186  switch (II->getIntrinsicID()) {
187  default: return false;
188  case Intrinsic::memset:
189  case Intrinsic::memcpy:
190  case Intrinsic::memcpy_element_unordered_atomic:
191  case Intrinsic::memset_element_unordered_atomic:
192  // Do shorten memory intrinsics.
193  // FIXME: Add memmove if it's also safe to transform.
194  return true;
195  }
196  }
197 
198  // Don't shorten libcalls calls for now.
199 
200  return false;
201 }
202 
203 /// Returns true if the beginning of this instruction can be safely shortened
204 /// in length.
206  // FIXME: Handle only memset for now. Supporting memcpy/memmove should be
207  // easily done by offsetting the source address.
208  return isa<AnyMemSetInst>(I);
209 }
210 
211 static uint64_t getPointerSize(const Value *V, const DataLayout &DL,
212  const TargetLibraryInfo &TLI,
213  const Function *F) {
214  uint64_t Size;
215  ObjectSizeOpts Opts;
217 
218  if (getObjectSize(V, Size, DL, &TLI, Opts))
219  return Size;
221 }
222 
223 namespace {
224 
225 enum OverwriteResult {
226  OW_Begin,
227  OW_Complete,
228  OW_End,
229  OW_PartialEarlierWithFullLater,
230  OW_MaybePartial,
231  OW_None,
232  OW_Unknown
233 };
234 
235 } // end anonymous namespace
236 
237 /// Check if two instruction are masked stores that completely
238 /// overwrite one another. More specifically, \p KillingI has to
239 /// overwrite \p DeadI.
240 static OverwriteResult isMaskedStoreOverwrite(const Instruction *KillingI,
241  const Instruction *DeadI,
242  BatchAAResults &AA) {
243  const auto *KillingII = dyn_cast<IntrinsicInst>(KillingI);
244  const auto *DeadII = dyn_cast<IntrinsicInst>(DeadI);
245  if (KillingII == nullptr || DeadII == nullptr)
246  return OW_Unknown;
247  if (KillingII->getIntrinsicID() != Intrinsic::masked_store ||
248  DeadII->getIntrinsicID() != Intrinsic::masked_store)
249  return OW_Unknown;
250  // Pointers.
251  Value *KillingPtr = KillingII->getArgOperand(1)->stripPointerCasts();
252  Value *DeadPtr = DeadII->getArgOperand(1)->stripPointerCasts();
253  if (KillingPtr != DeadPtr && !AA.isMustAlias(KillingPtr, DeadPtr))
254  return OW_Unknown;
255  // Masks.
256  // TODO: check that KillingII's mask is a superset of the DeadII's mask.
257  if (KillingII->getArgOperand(3) != DeadII->getArgOperand(3))
258  return OW_Unknown;
259  return OW_Complete;
260 }
261 
262 /// Return 'OW_Complete' if a store to the 'KillingLoc' location completely
263 /// overwrites a store to the 'DeadLoc' location, 'OW_End' if the end of the
264 /// 'DeadLoc' location is completely overwritten by 'KillingLoc', 'OW_Begin'
265 /// if the beginning of the 'DeadLoc' location is overwritten by 'KillingLoc'.
266 /// 'OW_PartialEarlierWithFullLater' means that a dead (big) store was
267 /// overwritten by a killing (smaller) store which doesn't write outside the big
268 /// store's memory locations. Returns 'OW_Unknown' if nothing can be determined.
269 /// NOTE: This function must only be called if both \p KillingLoc and \p
270 /// DeadLoc belong to the same underlying object with valid \p KillingOff and
271 /// \p DeadOff.
272 static OverwriteResult isPartialOverwrite(const MemoryLocation &KillingLoc,
273  const MemoryLocation &DeadLoc,
274  int64_t KillingOff, int64_t DeadOff,
275  Instruction *DeadI,
276  InstOverlapIntervalsTy &IOL) {
277  const uint64_t KillingSize = KillingLoc.Size.getValue();
278  const uint64_t DeadSize = DeadLoc.Size.getValue();
279  // We may now overlap, although the overlap is not complete. There might also
280  // be other incomplete overlaps, and together, they might cover the complete
281  // dead store.
282  // Note: The correctness of this logic depends on the fact that this function
283  // is not even called providing DepWrite when there are any intervening reads.
285  KillingOff < int64_t(DeadOff + DeadSize) &&
286  int64_t(KillingOff + KillingSize) >= DeadOff) {
287 
288  // Insert our part of the overlap into the map.
289  auto &IM = IOL[DeadI];
290  LLVM_DEBUG(dbgs() << "DSE: Partial overwrite: DeadLoc [" << DeadOff << ", "
291  << int64_t(DeadOff + DeadSize) << ") KillingLoc ["
292  << KillingOff << ", " << int64_t(KillingOff + KillingSize)
293  << ")\n");
294 
295  // Make sure that we only insert non-overlapping intervals and combine
296  // adjacent intervals. The intervals are stored in the map with the ending
297  // offset as the key (in the half-open sense) and the starting offset as
298  // the value.
299  int64_t KillingIntStart = KillingOff;
300  int64_t KillingIntEnd = KillingOff + KillingSize;
301 
302  // Find any intervals ending at, or after, KillingIntStart which start
303  // before KillingIntEnd.
304  auto ILI = IM.lower_bound(KillingIntStart);
305  if (ILI != IM.end() && ILI->second <= KillingIntEnd) {
306  // This existing interval is overlapped with the current store somewhere
307  // in [KillingIntStart, KillingIntEnd]. Merge them by erasing the existing
308  // intervals and adjusting our start and end.
309  KillingIntStart = std::min(KillingIntStart, ILI->second);
310  KillingIntEnd = std::max(KillingIntEnd, ILI->first);
311  ILI = IM.erase(ILI);
312 
313  // Continue erasing and adjusting our end in case other previous
314  // intervals are also overlapped with the current store.
315  //
316  // |--- dead 1 ---| |--- dead 2 ---|
317  // |------- killing---------|
318  //
319  while (ILI != IM.end() && ILI->second <= KillingIntEnd) {
320  assert(ILI->second > KillingIntStart && "Unexpected interval");
321  KillingIntEnd = std::max(KillingIntEnd, ILI->first);
322  ILI = IM.erase(ILI);
323  }
324  }
325 
326  IM[KillingIntEnd] = KillingIntStart;
327 
328  ILI = IM.begin();
329  if (ILI->second <= DeadOff && ILI->first >= int64_t(DeadOff + DeadSize)) {
330  LLVM_DEBUG(dbgs() << "DSE: Full overwrite from partials: DeadLoc ["
331  << DeadOff << ", " << int64_t(DeadOff + DeadSize)
332  << ") Composite KillingLoc [" << ILI->second << ", "
333  << ILI->first << ")\n");
334  ++NumCompletePartials;
335  return OW_Complete;
336  }
337  }
338 
339  // Check for a dead store which writes to all the memory locations that
340  // the killing store writes to.
341  if (EnablePartialStoreMerging && KillingOff >= DeadOff &&
342  int64_t(DeadOff + DeadSize) > KillingOff &&
343  uint64_t(KillingOff - DeadOff) + KillingSize <= DeadSize) {
344  LLVM_DEBUG(dbgs() << "DSE: Partial overwrite a dead load [" << DeadOff
345  << ", " << int64_t(DeadOff + DeadSize)
346  << ") by a killing store [" << KillingOff << ", "
347  << int64_t(KillingOff + KillingSize) << ")\n");
348  // TODO: Maybe come up with a better name?
349  return OW_PartialEarlierWithFullLater;
350  }
351 
352  // Another interesting case is if the killing store overwrites the end of the
353  // dead store.
354  //
355  // |--dead--|
356  // |-- killing --|
357  //
358  // In this case we may want to trim the size of dead store to avoid
359  // generating stores to addresses which will definitely be overwritten killing
360  // store.
362  (KillingOff > DeadOff && KillingOff < int64_t(DeadOff + DeadSize) &&
363  int64_t(KillingOff + KillingSize) >= int64_t(DeadOff + DeadSize)))
364  return OW_End;
365 
366  // Finally, we also need to check if the killing store overwrites the
367  // beginning of the dead store.
368  //
369  // |--dead--|
370  // |-- killing --|
371  //
372  // In this case we may want to move the destination address and trim the size
373  // of dead store to avoid generating stores to addresses which will definitely
374  // be overwritten killing store.
376  (KillingOff <= DeadOff && int64_t(KillingOff + KillingSize) > DeadOff)) {
377  assert(int64_t(KillingOff + KillingSize) < int64_t(DeadOff + DeadSize) &&
378  "Expect to be handled as OW_Complete");
379  return OW_Begin;
380  }
381  // Otherwise, they don't completely overlap.
382  return OW_Unknown;
383 }
384 
385 /// Returns true if the memory which is accessed by the second instruction is not
386 /// modified between the first and the second instruction.
387 /// Precondition: Second instruction must be dominated by the first
388 /// instruction.
389 static bool
391  BatchAAResults &AA, const DataLayout &DL,
392  DominatorTree *DT) {
393  // Do a backwards scan through the CFG from SecondI to FirstI. Look for
394  // instructions which can modify the memory location accessed by SecondI.
395  //
396  // While doing the walk keep track of the address to check. It might be
397  // different in different basic blocks due to PHI translation.
398  using BlockAddressPair = std::pair<BasicBlock *, PHITransAddr>;
400  // Keep track of the address we visited each block with. Bail out if we
401  // visit a block with different addresses.
403 
404  BasicBlock::iterator FirstBBI(FirstI);
405  ++FirstBBI;
406  BasicBlock::iterator SecondBBI(SecondI);
407  BasicBlock *FirstBB = FirstI->getParent();
408  BasicBlock *SecondBB = SecondI->getParent();
409  MemoryLocation MemLoc;
410  if (auto *MemSet = dyn_cast<MemSetInst>(SecondI))
411  MemLoc = MemoryLocation::getForDest(MemSet);
412  else
413  MemLoc = MemoryLocation::get(SecondI);
414 
415  auto *MemLocPtr = const_cast<Value *>(MemLoc.Ptr);
416 
417  // Start checking the SecondBB.
418  WorkList.push_back(
419  std::make_pair(SecondBB, PHITransAddr(MemLocPtr, DL, nullptr)));
420  bool isFirstBlock = true;
421 
422  // Check all blocks going backward until we reach the FirstBB.
423  while (!WorkList.empty()) {
424  BlockAddressPair Current = WorkList.pop_back_val();
425  BasicBlock *B = Current.first;
426  PHITransAddr &Addr = Current.second;
427  Value *Ptr = Addr.getAddr();
428 
429  // Ignore instructions before FirstI if this is the FirstBB.
430  BasicBlock::iterator BI = (B == FirstBB ? FirstBBI : B->begin());
431 
433  if (isFirstBlock) {
434  // Ignore instructions after SecondI if this is the first visit of SecondBB.
435  assert(B == SecondBB && "first block is not the store block");
436  EI = SecondBBI;
437  isFirstBlock = false;
438  } else {
439  // It's not SecondBB or (in case of a loop) the second visit of SecondBB.
440  // In this case we also have to look at instructions after SecondI.
441  EI = B->end();
442  }
443  for (; BI != EI; ++BI) {
444  Instruction *I = &*BI;
445  if (I->mayWriteToMemory() && I != SecondI)
446  if (isModSet(AA.getModRefInfo(I, MemLoc.getWithNewPtr(Ptr))))
447  return false;
448  }
449  if (B != FirstBB) {
450  assert(B != &FirstBB->getParent()->getEntryBlock() &&
451  "Should not hit the entry block because SI must be dominated by LI");
452  for (BasicBlock *Pred : predecessors(B)) {
453  PHITransAddr PredAddr = Addr;
454  if (PredAddr.NeedsPHITranslationFromBlock(B)) {
455  if (!PredAddr.IsPotentiallyPHITranslatable())
456  return false;
457  if (PredAddr.PHITranslateValue(B, Pred, DT, false))
458  return false;
459  }
460  Value *TranslatedPtr = PredAddr.getAddr();
461  auto Inserted = Visited.insert(std::make_pair(Pred, TranslatedPtr));
462  if (!Inserted.second) {
463  // We already visited this block before. If it was with a different
464  // address - bail out!
465  if (TranslatedPtr != Inserted.first->second)
466  return false;
467  // ... otherwise just skip it.
468  continue;
469  }
470  WorkList.push_back(std::make_pair(Pred, PredAddr));
471  }
472  }
473  }
474  return true;
475 }
476 
477 static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart,
478  uint64_t &DeadSize, int64_t KillingStart,
479  uint64_t KillingSize, bool IsOverwriteEnd) {
480  auto *DeadIntrinsic = cast<AnyMemIntrinsic>(DeadI);
481  Align PrefAlign = DeadIntrinsic->getDestAlign().valueOrOne();
482 
483  // We assume that memet/memcpy operates in chunks of the "largest" native
484  // type size and aligned on the same value. That means optimal start and size
485  // of memset/memcpy should be modulo of preferred alignment of that type. That
486  // is it there is no any sense in trying to reduce store size any further
487  // since any "extra" stores comes for free anyway.
488  // On the other hand, maximum alignment we can achieve is limited by alignment
489  // of initial store.
490 
491  // TODO: Limit maximum alignment by preferred (or abi?) alignment of the
492  // "largest" native type.
493  // Note: What is the proper way to get that value?
494  // Should TargetTransformInfo::getRegisterBitWidth be used or anything else?
495  // PrefAlign = std::min(DL.getPrefTypeAlign(LargestType), PrefAlign);
496 
497  int64_t ToRemoveStart = 0;
498  uint64_t ToRemoveSize = 0;
499  // Compute start and size of the region to remove. Make sure 'PrefAlign' is
500  // maintained on the remaining store.
501  if (IsOverwriteEnd) {
502  // Calculate required adjustment for 'KillingStart' in order to keep
503  // remaining store size aligned on 'PerfAlign'.
504  uint64_t Off =
505  offsetToAlignment(uint64_t(KillingStart - DeadStart), PrefAlign);
506  ToRemoveStart = KillingStart + Off;
507  if (DeadSize <= uint64_t(ToRemoveStart - DeadStart))
508  return false;
509  ToRemoveSize = DeadSize - uint64_t(ToRemoveStart - DeadStart);
510  } else {
511  ToRemoveStart = DeadStart;
512  assert(KillingSize >= uint64_t(DeadStart - KillingStart) &&
513  "Not overlapping accesses?");
514  ToRemoveSize = KillingSize - uint64_t(DeadStart - KillingStart);
515  // Calculate required adjustment for 'ToRemoveSize'in order to keep
516  // start of the remaining store aligned on 'PerfAlign'.
517  uint64_t Off = offsetToAlignment(ToRemoveSize, PrefAlign);
518  if (Off != 0) {
519  if (ToRemoveSize <= (PrefAlign.value() - Off))
520  return false;
521  ToRemoveSize -= PrefAlign.value() - Off;
522  }
523  assert(isAligned(PrefAlign, ToRemoveSize) &&
524  "Should preserve selected alignment");
525  }
526 
527  assert(ToRemoveSize > 0 && "Shouldn't reach here if nothing to remove");
528  assert(DeadSize > ToRemoveSize && "Can't remove more than original size");
529 
530  uint64_t NewSize = DeadSize - ToRemoveSize;
531  if (auto *AMI = dyn_cast<AtomicMemIntrinsic>(DeadI)) {
532  // When shortening an atomic memory intrinsic, the newly shortened
533  // length must remain an integer multiple of the element size.
534  const uint32_t ElementSize = AMI->getElementSizeInBytes();
535  if (0 != NewSize % ElementSize)
536  return false;
537  }
538 
539  LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW "
540  << (IsOverwriteEnd ? "END" : "BEGIN") << ": " << *DeadI
541  << "\n KILLER [" << ToRemoveStart << ", "
542  << int64_t(ToRemoveStart + ToRemoveSize) << ")\n");
543 
544  Value *DeadWriteLength = DeadIntrinsic->getLength();
545  Value *TrimmedLength = ConstantInt::get(DeadWriteLength->getType(), NewSize);
546  DeadIntrinsic->setLength(TrimmedLength);
547  DeadIntrinsic->setDestAlignment(PrefAlign);
548 
549  if (!IsOverwriteEnd) {
550  Value *OrigDest = DeadIntrinsic->getRawDest();
551  Type *Int8PtrTy =
552  Type::getInt8PtrTy(DeadIntrinsic->getContext(),
553  OrigDest->getType()->getPointerAddressSpace());
554  Value *Dest = OrigDest;
555  if (OrigDest->getType() != Int8PtrTy)
556  Dest = CastInst::CreatePointerCast(OrigDest, Int8PtrTy, "", DeadI);
557  Value *Indices[1] = {
558  ConstantInt::get(DeadWriteLength->getType(), ToRemoveSize)};
560  Type::getInt8Ty(DeadIntrinsic->getContext()), Dest, Indices, "", DeadI);
561  NewDestGEP->setDebugLoc(DeadIntrinsic->getDebugLoc());
562  if (NewDestGEP->getType() != OrigDest->getType())
563  NewDestGEP = CastInst::CreatePointerCast(NewDestGEP, OrigDest->getType(),
564  "", DeadI);
565  DeadIntrinsic->setDest(NewDestGEP);
566  }
567 
568  // Finally update start and size of dead access.
569  if (!IsOverwriteEnd)
570  DeadStart += ToRemoveSize;
571  DeadSize = NewSize;
572 
573  return true;
574 }
575 
577  int64_t &DeadStart, uint64_t &DeadSize) {
578  if (IntervalMap.empty() || !isShortenableAtTheEnd(DeadI))
579  return false;
580 
581  OverlapIntervalsTy::iterator OII = --IntervalMap.end();
582  int64_t KillingStart = OII->second;
583  uint64_t KillingSize = OII->first - KillingStart;
584 
585  assert(OII->first - KillingStart >= 0 && "Size expected to be positive");
586 
587  if (KillingStart > DeadStart &&
588  // Note: "KillingStart - KillingStart" is known to be positive due to
589  // preceding check.
590  (uint64_t)(KillingStart - DeadStart) < DeadSize &&
591  // Note: "DeadSize - (uint64_t)(KillingStart - DeadStart)" is known to
592  // be non negative due to preceding checks.
593  KillingSize >= DeadSize - (uint64_t)(KillingStart - DeadStart)) {
594  if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
595  true)) {
596  IntervalMap.erase(OII);
597  return true;
598  }
599  }
600  return false;
601 }
602 
603 static bool tryToShortenBegin(Instruction *DeadI,
605  int64_t &DeadStart, uint64_t &DeadSize) {
607  return false;
608 
609  OverlapIntervalsTy::iterator OII = IntervalMap.begin();
610  int64_t KillingStart = OII->second;
611  uint64_t KillingSize = OII->first - KillingStart;
612 
613  assert(OII->first - KillingStart >= 0 && "Size expected to be positive");
614 
615  if (KillingStart <= DeadStart &&
616  // Note: "DeadStart - KillingStart" is known to be non negative due to
617  // preceding check.
618  KillingSize > (uint64_t)(DeadStart - KillingStart)) {
619  // Note: "KillingSize - (uint64_t)(DeadStart - DeadStart)" is known to
620  // be positive due to preceding checks.
621  assert(KillingSize - (uint64_t)(DeadStart - KillingStart) < DeadSize &&
622  "Should have been handled as OW_Complete");
623  if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
624  false)) {
625  IntervalMap.erase(OII);
626  return true;
627  }
628  }
629  return false;
630 }
631 
632 static Constant *
634  int64_t KillingOffset, int64_t DeadOffset,
635  const DataLayout &DL, BatchAAResults &AA,
636  DominatorTree *DT) {
637 
638  if (DeadI && isa<ConstantInt>(DeadI->getValueOperand()) &&
639  DL.typeSizeEqualsStoreSize(DeadI->getValueOperand()->getType()) &&
640  KillingI && isa<ConstantInt>(KillingI->getValueOperand()) &&
641  DL.typeSizeEqualsStoreSize(KillingI->getValueOperand()->getType()) &&
642  memoryIsNotModifiedBetween(DeadI, KillingI, AA, DL, DT)) {
643  // If the store we find is:
644  // a) partially overwritten by the store to 'Loc'
645  // b) the killing store is fully contained in the dead one and
646  // c) they both have a constant value
647  // d) none of the two stores need padding
648  // Merge the two stores, replacing the dead store's value with a
649  // merge of both values.
650  // TODO: Deal with other constant types (vectors, etc), and probably
651  // some mem intrinsics (if needed)
652 
653  APInt DeadValue = cast<ConstantInt>(DeadI->getValueOperand())->getValue();
654  APInt KillingValue =
655  cast<ConstantInt>(KillingI->getValueOperand())->getValue();
656  unsigned KillingBits = KillingValue.getBitWidth();
657  assert(DeadValue.getBitWidth() > KillingValue.getBitWidth());
658  KillingValue = KillingValue.zext(DeadValue.getBitWidth());
659 
660  // Offset of the smaller store inside the larger store
661  unsigned BitOffsetDiff = (KillingOffset - DeadOffset) * 8;
662  unsigned LShiftAmount =
663  DL.isBigEndian() ? DeadValue.getBitWidth() - BitOffsetDiff - KillingBits
664  : BitOffsetDiff;
665  APInt Mask = APInt::getBitsSet(DeadValue.getBitWidth(), LShiftAmount,
666  LShiftAmount + KillingBits);
667  // Clear the bits we'll be replacing, then OR with the smaller
668  // store, shifted appropriately.
669  APInt Merged = (DeadValue & ~Mask) | (KillingValue << LShiftAmount);
670  LLVM_DEBUG(dbgs() << "DSE: Merge Stores:\n Dead: " << *DeadI
671  << "\n Killing: " << *KillingI
672  << "\n Merged Value: " << Merged << '\n');
673  return ConstantInt::get(DeadI->getValueOperand()->getType(), Merged);
674  }
675  return nullptr;
676 }
677 
678 namespace {
679 // Returns true if \p I is an intrisnic that does not read or write memory.
680 bool isNoopIntrinsic(Instruction *I) {
681  if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
682  switch (II->getIntrinsicID()) {
683  case Intrinsic::lifetime_start:
684  case Intrinsic::lifetime_end:
685  case Intrinsic::invariant_end:
686  case Intrinsic::launder_invariant_group:
687  case Intrinsic::assume:
688  return true;
689  case Intrinsic::dbg_addr:
690  case Intrinsic::dbg_declare:
691  case Intrinsic::dbg_label:
692  case Intrinsic::dbg_value:
693  llvm_unreachable("Intrinsic should not be modeled in MemorySSA");
694  default:
695  return false;
696  }
697  }
698  return false;
699 }
700 
701 // Check if we can ignore \p D for DSE.
702 bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) {
703  Instruction *DI = D->getMemoryInst();
704  // Calls that only access inaccessible memory cannot read or write any memory
705  // locations we consider for elimination.
706  if (auto *CB = dyn_cast<CallBase>(DI))
707  if (CB->onlyAccessesInaccessibleMemory())
708  return true;
709 
710  // We can eliminate stores to locations not visible to the caller across
711  // throwing instructions.
712  if (DI->mayThrow() && !DefVisibleToCaller)
713  return true;
714 
715  // We can remove the dead stores, irrespective of the fence and its ordering
716  // (release/acquire/seq_cst). Fences only constraints the ordering of
717  // already visible stores, it does not make a store visible to other
718  // threads. So, skipping over a fence does not change a store from being
719  // dead.
720  if (isa<FenceInst>(DI))
721  return true;
722 
723  // Skip intrinsics that do not really read or modify memory.
724  if (isNoopIntrinsic(DI))
725  return true;
726 
727  return false;
728 }
729 
730 struct DSEState {
731  Function &F;
732  AliasAnalysis &AA;
734 
735  /// The single BatchAA instance that is used to cache AA queries. It will
736  /// not be invalidated over the whole run. This is safe, because:
737  /// 1. Only memory writes are removed, so the alias cache for memory
738  /// locations remains valid.
739  /// 2. No new instructions are added (only instructions removed), so cached
740  /// information for a deleted value cannot be accessed by a re-used new
741  /// value pointer.
742  BatchAAResults BatchAA;
743 
744  MemorySSA &MSSA;
745  DominatorTree &DT;
746  PostDominatorTree &PDT;
747  const TargetLibraryInfo &TLI;
748  const DataLayout &DL;
749  const LoopInfo &LI;
750 
751  // Whether the function contains any irreducible control flow, useful for
752  // being accurately able to detect loops.
753  bool ContainsIrreducibleLoops;
754 
755  // All MemoryDefs that potentially could kill other MemDefs.
757  // Any that should be skipped as they are already deleted
759  // Keep track of all of the objects that are invisible to the caller before
760  // the function returns.
761  DenseMap<const Value *, bool> InvisibleToCallerBeforeRet;
762  // Keep track of all of the objects that are invisible to the caller after
763  // the function returns.
764  DenseMap<const Value *, bool> InvisibleToCallerAfterRet;
765  // Keep track of blocks with throwing instructions not modeled in MemorySSA.
766  SmallPtrSet<BasicBlock *, 16> ThrowingBlocks;
767  // Post-order numbers for each basic block. Used to figure out if memory
768  // accesses are executed before another access.
769  DenseMap<BasicBlock *, unsigned> PostOrderNumbers;
770 
771  /// Keep track of instructions (partly) overlapping with killing MemoryDefs per
772  /// basic block.
774 
775  // Class contains self-reference, make sure it's not copied/moved.
776  DSEState(const DSEState &) = delete;
777  DSEState &operator=(const DSEState &) = delete;
778 
779  DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT,
780  PostDominatorTree &PDT, const TargetLibraryInfo &TLI,
781  const LoopInfo &LI)
782  : F(F), AA(AA), EI(DT, LI), BatchAA(AA, &EI), MSSA(MSSA), DT(DT),
783  PDT(PDT), TLI(TLI), DL(F.getParent()->getDataLayout()), LI(LI) {
784  // Collect blocks with throwing instructions not modeled in MemorySSA and
785  // alloc-like objects.
786  unsigned PO = 0;
787  for (BasicBlock *BB : post_order(&F)) {
788  PostOrderNumbers[BB] = PO++;
789  for (Instruction &I : *BB) {
790  MemoryAccess *MA = MSSA.getMemoryAccess(&I);
791  if (I.mayThrow() && !MA)
792  ThrowingBlocks.insert(I.getParent());
793 
794  auto *MD = dyn_cast_or_null<MemoryDef>(MA);
795  if (MD && MemDefs.size() < MemorySSADefsPerBlockLimit &&
796  (getLocForWrite(&I) || isMemTerminatorInst(&I)))
797  MemDefs.push_back(MD);
798  }
799  }
800 
801  // Treat byval or inalloca arguments the same as Allocas, stores to them are
802  // dead at the end of the function.
803  for (Argument &AI : F.args())
804  if (AI.hasPassPointeeByValueCopyAttr()) {
805  // For byval, the caller doesn't know the address of the allocation.
806  if (AI.hasByValAttr())
807  InvisibleToCallerBeforeRet.insert({&AI, true});
808  InvisibleToCallerAfterRet.insert({&AI, true});
809  }
810 
811  // Collect whether there is any irreducible control flow in the function.
812  ContainsIrreducibleLoops = mayContainIrreducibleControl(F, &LI);
813  }
814 
815  /// Return 'OW_Complete' if a store to the 'KillingLoc' location (by \p
816  /// KillingI instruction) completely overwrites a store to the 'DeadLoc'
817  /// location (by \p DeadI instruction).
818  /// Return OW_MaybePartial if \p KillingI does not completely overwrite
819  /// \p DeadI, but they both write to the same underlying object. In that
820  /// case, use isPartialOverwrite to check if \p KillingI partially overwrites
821  /// \p DeadI. Returns 'OR_None' if \p KillingI is known to not overwrite the
822  /// \p DeadI. Returns 'OW_Unknown' if nothing can be determined.
823  OverwriteResult isOverwrite(const Instruction *KillingI,
824  const Instruction *DeadI,
825  const MemoryLocation &KillingLoc,
826  const MemoryLocation &DeadLoc,
827  int64_t &KillingOff, int64_t &DeadOff) {
828  // AliasAnalysis does not always account for loops. Limit overwrite checks
829  // to dependencies for which we can guarantee they are independent of any
830  // loops they are in.
831  if (!isGuaranteedLoopIndependent(DeadI, KillingI, DeadLoc))
832  return OW_Unknown;
833 
834  const Value *DeadPtr = DeadLoc.Ptr->stripPointerCasts();
835  const Value *KillingPtr = KillingLoc.Ptr->stripPointerCasts();
836  const Value *DeadUndObj = getUnderlyingObject(DeadPtr);
837  const Value *KillingUndObj = getUnderlyingObject(KillingPtr);
838 
839  // Check whether the killing store overwrites the whole object, in which
840  // case the size/offset of the dead store does not matter.
841  if (DeadUndObj == KillingUndObj && KillingLoc.Size.isPrecise()) {
842  uint64_t KillingUndObjSize = getPointerSize(KillingUndObj, DL, TLI, &F);
843  if (KillingUndObjSize != MemoryLocation::UnknownSize &&
844  KillingUndObjSize == KillingLoc.Size.getValue())
845  return OW_Complete;
846  }
847 
848  // FIXME: Vet that this works for size upper-bounds. Seems unlikely that we'll
849  // get imprecise values here, though (except for unknown sizes).
850  if (!KillingLoc.Size.isPrecise() || !DeadLoc.Size.isPrecise()) {
851  // In case no constant size is known, try to an IR values for the number
852  // of bytes written and check if they match.
853  const auto *KillingMemI = dyn_cast<MemIntrinsic>(KillingI);
854  const auto *DeadMemI = dyn_cast<MemIntrinsic>(DeadI);
855  if (KillingMemI && DeadMemI) {
856  const Value *KillingV = KillingMemI->getLength();
857  const Value *DeadV = DeadMemI->getLength();
858  if (KillingV == DeadV && BatchAA.isMustAlias(DeadLoc, KillingLoc))
859  return OW_Complete;
860  }
861 
862  // Masked stores have imprecise locations, but we can reason about them
863  // to some extent.
864  return isMaskedStoreOverwrite(KillingI, DeadI, BatchAA);
865  }
866 
867  const uint64_t KillingSize = KillingLoc.Size.getValue();
868  const uint64_t DeadSize = DeadLoc.Size.getValue();
869 
870  // Query the alias information
871  AliasResult AAR = BatchAA.alias(KillingLoc, DeadLoc);
872 
873  // If the start pointers are the same, we just have to compare sizes to see if
874  // the killing store was larger than the dead store.
875  if (AAR == AliasResult::MustAlias) {
876  // Make sure that the KillingSize size is >= the DeadSize size.
877  if (KillingSize >= DeadSize)
878  return OW_Complete;
879  }
880 
881  // If we hit a partial alias we may have a full overwrite
882  if (AAR == AliasResult::PartialAlias && AAR.hasOffset()) {
883  int32_t Off = AAR.getOffset();
884  if (Off >= 0 && (uint64_t)Off + DeadSize <= KillingSize)
885  return OW_Complete;
886  }
887 
888  // If we can't resolve the same pointers to the same object, then we can't
889  // analyze them at all.
890  if (DeadUndObj != KillingUndObj) {
891  // Non aliasing stores to different objects don't overlap. Note that
892  // if the killing store is known to overwrite whole object (out of
893  // bounds access overwrites whole object as well) then it is assumed to
894  // completely overwrite any store to the same object even if they don't
895  // actually alias (see next check).
896  if (AAR == AliasResult::NoAlias)
897  return OW_None;
898  return OW_Unknown;
899  }
900 
901  // Okay, we have stores to two completely different pointers. Try to
902  // decompose the pointer into a "base + constant_offset" form. If the base
903  // pointers are equal, then we can reason about the two stores.
904  DeadOff = 0;
905  KillingOff = 0;
906  const Value *DeadBasePtr =
907  GetPointerBaseWithConstantOffset(DeadPtr, DeadOff, DL);
908  const Value *KillingBasePtr =
909  GetPointerBaseWithConstantOffset(KillingPtr, KillingOff, DL);
910 
911  // If the base pointers still differ, we have two completely different
912  // stores.
913  if (DeadBasePtr != KillingBasePtr)
914  return OW_Unknown;
915 
916  // The killing access completely overlaps the dead store if and only if
917  // both start and end of the dead one is "inside" the killing one:
918  // |<->|--dead--|<->|
919  // |-----killing------|
920  // Accesses may overlap if and only if start of one of them is "inside"
921  // another one:
922  // |<->|--dead--|<-------->|
923  // |-------killing--------|
924  // OR
925  // |-------dead-------|
926  // |<->|---killing---|<----->|
927  //
928  // We have to be careful here as *Off is signed while *.Size is unsigned.
929 
930  // Check if the dead access starts "not before" the killing one.
931  if (DeadOff >= KillingOff) {
932  // If the dead access ends "not after" the killing access then the
933  // dead one is completely overwritten by the killing one.
934  if (uint64_t(DeadOff - KillingOff) + DeadSize <= KillingSize)
935  return OW_Complete;
936  // If start of the dead access is "before" end of the killing access
937  // then accesses overlap.
938  else if ((uint64_t)(DeadOff - KillingOff) < KillingSize)
939  return OW_MaybePartial;
940  }
941  // If start of the killing access is "before" end of the dead access then
942  // accesses overlap.
943  else if ((uint64_t)(KillingOff - DeadOff) < DeadSize) {
944  return OW_MaybePartial;
945  }
946 
947  // Can reach here only if accesses are known not to overlap.
948  return OW_None;
949  }
950 
951  bool isInvisibleToCallerAfterRet(const Value *V) {
952  if (isa<AllocaInst>(V))
953  return true;
954  auto I = InvisibleToCallerAfterRet.insert({V, false});
955  if (I.second) {
956  if (!isInvisibleToCallerBeforeRet(V)) {
957  I.first->second = false;
958  } else if (isNoAliasCall(V)) {
959  I.first->second = !PointerMayBeCaptured(V, true, false);
960  }
961  }
962  return I.first->second;
963  }
964 
965  bool isInvisibleToCallerBeforeRet(const Value *V) {
966  if (isa<AllocaInst>(V))
967  return true;
968  auto I = InvisibleToCallerBeforeRet.insert({V, false});
969  if (I.second && isNoAliasCall(V))
970  // NOTE: This could be made more precise by PointerMayBeCapturedBefore
971  // with the killing MemoryDef. But we refrain from doing so for now to
972  // limit compile-time and this does not cause any changes to the number
973  // of stores removed on a large test set in practice.
974  I.first->second = !PointerMayBeCaptured(V, false, true);
975  return I.first->second;
976  }
977 
978  Optional<MemoryLocation> getLocForWrite(Instruction *I) const {
979  if (!I->mayWriteToMemory())
980  return None;
981 
982  if (auto *CB = dyn_cast<CallBase>(I))
983  return MemoryLocation::getForDest(CB, TLI);
984 
986  }
987 
988  /// Assuming this instruction has a dead analyzable write, can we delete
989  /// this instruction?
990  bool isRemovable(Instruction *I) {
991  assert(getLocForWrite(I) && "Must have analyzable write");
992 
993  // Don't remove volatile/atomic stores.
994  if (StoreInst *SI = dyn_cast<StoreInst>(I))
995  return SI->isUnordered();
996 
997  if (auto *CB = dyn_cast<CallBase>(I)) {
998  // Don't remove volatile memory intrinsics.
999  if (auto *MI = dyn_cast<MemIntrinsic>(CB))
1000  return !MI->isVolatile();
1001 
1002  // Never remove dead lifetime intrinsics, e.g. because they are followed
1003  // by a free.
1004  if (CB->isLifetimeStartOrEnd())
1005  return false;
1006 
1007  return CB->use_empty() && CB->willReturn() && CB->doesNotThrow();
1008  }
1009 
1010  return false;
1011  }
1012 
1013  /// Returns true if \p UseInst completely overwrites \p DefLoc
1014  /// (stored by \p DefInst).
1015  bool isCompleteOverwrite(const MemoryLocation &DefLoc, Instruction *DefInst,
1016  Instruction *UseInst) {
1017  // UseInst has a MemoryDef associated in MemorySSA. It's possible for a
1018  // MemoryDef to not write to memory, e.g. a volatile load is modeled as a
1019  // MemoryDef.
1020  if (!UseInst->mayWriteToMemory())
1021  return false;
1022 
1023  if (auto *CB = dyn_cast<CallBase>(UseInst))
1024  if (CB->onlyAccessesInaccessibleMemory())
1025  return false;
1026 
1027  int64_t InstWriteOffset, DepWriteOffset;
1028  if (auto CC = getLocForWrite(UseInst))
1029  return isOverwrite(UseInst, DefInst, *CC, DefLoc, InstWriteOffset,
1030  DepWriteOffset) == OW_Complete;
1031  return false;
1032  }
1033 
1034  /// Returns true if \p Def is not read before returning from the function.
1035  bool isWriteAtEndOfFunction(MemoryDef *Def) {
1036  LLVM_DEBUG(dbgs() << " Check if def " << *Def << " ("
1037  << *Def->getMemoryInst()
1038  << ") is at the end the function \n");
1039 
1040  auto MaybeLoc = getLocForWrite(Def->getMemoryInst());
1041  if (!MaybeLoc) {
1042  LLVM_DEBUG(dbgs() << " ... could not get location for write.\n");
1043  return false;
1044  }
1045 
1048  auto PushMemUses = [&WorkList, &Visited](MemoryAccess *Acc) {
1049  if (!Visited.insert(Acc).second)
1050  return;
1051  for (Use &U : Acc->uses())
1052  WorkList.push_back(cast<MemoryAccess>(U.getUser()));
1053  };
1054  PushMemUses(Def);
1055  for (unsigned I = 0; I < WorkList.size(); I++) {
1056  if (WorkList.size() >= MemorySSAScanLimit) {
1057  LLVM_DEBUG(dbgs() << " ... hit exploration limit.\n");
1058  return false;
1059  }
1060 
1061  MemoryAccess *UseAccess = WorkList[I];
1062  // Simply adding the users of MemoryPhi to the worklist is not enough,
1063  // because we might miss read clobbers in different iterations of a loop,
1064  // for example.
1065  // TODO: Add support for phi translation to handle the loop case.
1066  if (isa<MemoryPhi>(UseAccess))
1067  return false;
1068 
1069  // TODO: Checking for aliasing is expensive. Consider reducing the amount
1070  // of times this is called and/or caching it.
1071  Instruction *UseInst = cast<MemoryUseOrDef>(UseAccess)->getMemoryInst();
1072  if (isReadClobber(*MaybeLoc, UseInst)) {
1073  LLVM_DEBUG(dbgs() << " ... hit read clobber " << *UseInst << ".\n");
1074  return false;
1075  }
1076 
1077  if (MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess))
1078  PushMemUses(UseDef);
1079  }
1080  return true;
1081  }
1082 
1083  /// If \p I is a memory terminator like llvm.lifetime.end or free, return a
1084  /// pair with the MemoryLocation terminated by \p I and a boolean flag
1085  /// indicating whether \p I is a free-like call.
1087  getLocForTerminator(Instruction *I) const {
1088  uint64_t Len;
1089  Value *Ptr;
1090  if (match(I, m_Intrinsic<Intrinsic::lifetime_end>(m_ConstantInt(Len),
1091  m_Value(Ptr))))
1092  return {std::make_pair(MemoryLocation(Ptr, Len), false)};
1093 
1094  if (auto *CB = dyn_cast<CallBase>(I)) {
1095  if (isFreeCall(I, &TLI))
1096  return {std::make_pair(MemoryLocation::getAfter(CB->getArgOperand(0)),
1097  true)};
1098  }
1099 
1100  return None;
1101  }
1102 
1103  /// Returns true if \p I is a memory terminator instruction like
1104  /// llvm.lifetime.end or free.
1105  bool isMemTerminatorInst(Instruction *I) const {
1106  IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
1107  return (II && II->getIntrinsicID() == Intrinsic::lifetime_end) ||
1108  isFreeCall(I, &TLI);
1109  }
1110 
1111  /// Returns true if \p MaybeTerm is a memory terminator for \p Loc from
1112  /// instruction \p AccessI.
1113  bool isMemTerminator(const MemoryLocation &Loc, Instruction *AccessI,
1114  Instruction *MaybeTerm) {
1116  getLocForTerminator(MaybeTerm);
1117 
1118  if (!MaybeTermLoc)
1119  return false;
1120 
1121  // If the terminator is a free-like call, all accesses to the underlying
1122  // object can be considered terminated.
1123  if (getUnderlyingObject(Loc.Ptr) !=
1124  getUnderlyingObject(MaybeTermLoc->first.Ptr))
1125  return false;
1126 
1127  auto TermLoc = MaybeTermLoc->first;
1128  if (MaybeTermLoc->second) {
1129  const Value *LocUO = getUnderlyingObject(Loc.Ptr);
1130  return BatchAA.isMustAlias(TermLoc.Ptr, LocUO);
1131  }
1132  int64_t InstWriteOffset = 0;
1133  int64_t DepWriteOffset = 0;
1134  return isOverwrite(MaybeTerm, AccessI, TermLoc, Loc, InstWriteOffset,
1135  DepWriteOffset) == OW_Complete;
1136  }
1137 
1138  // Returns true if \p Use may read from \p DefLoc.
1139  bool isReadClobber(const MemoryLocation &DefLoc, Instruction *UseInst) {
1140  if (isNoopIntrinsic(UseInst))
1141  return false;
1142 
1143  // Monotonic or weaker atomic stores can be re-ordered and do not need to be
1144  // treated as read clobber.
1145  if (auto SI = dyn_cast<StoreInst>(UseInst))
1146  return isStrongerThan(SI->getOrdering(), AtomicOrdering::Monotonic);
1147 
1148  if (!UseInst->mayReadFromMemory())
1149  return false;
1150 
1151  if (auto *CB = dyn_cast<CallBase>(UseInst))
1152  if (CB->onlyAccessesInaccessibleMemory())
1153  return false;
1154 
1155  return isRefSet(BatchAA.getModRefInfo(UseInst, DefLoc));
1156  }
1157 
1158  /// Returns true if a dependency between \p Current and \p KillingDef is
1159  /// guaranteed to be loop invariant for the loops that they are in. Either
1160  /// because they are known to be in the same block, in the same loop level or
1161  /// by guaranteeing that \p CurrentLoc only references a single MemoryLocation
1162  /// during execution of the containing function.
1163  bool isGuaranteedLoopIndependent(const Instruction *Current,
1164  const Instruction *KillingDef,
1165  const MemoryLocation &CurrentLoc) {
1166  // If the dependency is within the same block or loop level (being careful
1167  // of irreducible loops), we know that AA will return a valid result for the
1168  // memory dependency. (Both at the function level, outside of any loop,
1169  // would also be valid but we currently disable that to limit compile time).
1170  if (Current->getParent() == KillingDef->getParent())
1171  return true;
1172  const Loop *CurrentLI = LI.getLoopFor(Current->getParent());
1173  if (!ContainsIrreducibleLoops && CurrentLI &&
1174  CurrentLI == LI.getLoopFor(KillingDef->getParent()))
1175  return true;
1176  // Otherwise check the memory location is invariant to any loops.
1177  return isGuaranteedLoopInvariant(CurrentLoc.Ptr);
1178  }
1179 
1180  /// Returns true if \p Ptr is guaranteed to be loop invariant for any possible
1181  /// loop. In particular, this guarantees that it only references a single
1182  /// MemoryLocation during execution of the containing function.
1183  bool isGuaranteedLoopInvariant(const Value *Ptr) {
1184  Ptr = Ptr->stripPointerCasts();
1185  if (auto *GEP = dyn_cast<GEPOperator>(Ptr))
1186  if (GEP->hasAllConstantIndices())
1187  Ptr = GEP->getPointerOperand()->stripPointerCasts();
1188 
1189  if (auto *I = dyn_cast<Instruction>(Ptr))
1190  return I->getParent()->isEntryBlock();
1191  return true;
1192  }
1193 
1194  // Find a MemoryDef writing to \p KillingLoc and dominating \p StartAccess,
1195  // with no read access between them or on any other path to a function exit
1196  // block if \p KillingLoc is not accessible after the function returns. If
1197  // there is no such MemoryDef, return None. The returned value may not
1198  // (completely) overwrite \p KillingLoc. Currently we bail out when we
1199  // encounter an aliasing MemoryUse (read).
1201  getDomMemoryDef(MemoryDef *KillingDef, MemoryAccess *StartAccess,
1202  const MemoryLocation &KillingLoc, const Value *KillingUndObj,
1203  unsigned &ScanLimit, unsigned &WalkerStepLimit,
1204  bool IsMemTerm, unsigned &PartialLimit) {
1205  if (ScanLimit == 0 || WalkerStepLimit == 0) {
1206  LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n");
1207  return None;
1208  }
1209 
1210  MemoryAccess *Current = StartAccess;
1211  Instruction *KillingI = KillingDef->getMemoryInst();
1212  LLVM_DEBUG(dbgs() << " trying to get dominating access\n");
1213 
1214  // Only optimize defining access of KillingDef when directly starting at its
1215  // defining access. The defining access also must only access KillingLoc. At
1216  // the moment we only support instructions with a single write location, so
1217  // it should be sufficient to disable optimizations for instructions that
1218  // also read from memory.
1219  bool CanOptimize = OptimizeMemorySSA &&
1220  KillingDef->getDefiningAccess() == StartAccess &&
1221  !KillingI->mayReadFromMemory();
1222 
1223  // Find the next clobbering Mod access for DefLoc, starting at StartAccess.
1224  Optional<MemoryLocation> CurrentLoc;
1225  for (;; Current = cast<MemoryDef>(Current)->getDefiningAccess()) {
1226  LLVM_DEBUG({
1227  dbgs() << " visiting " << *Current;
1228  if (!MSSA.isLiveOnEntryDef(Current) && isa<MemoryUseOrDef>(Current))
1229  dbgs() << " (" << *cast<MemoryUseOrDef>(Current)->getMemoryInst()
1230  << ")";
1231  dbgs() << "\n";
1232  });
1233 
1234  // Reached TOP.
1235  if (MSSA.isLiveOnEntryDef(Current)) {
1236  LLVM_DEBUG(dbgs() << " ... found LiveOnEntryDef\n");
1237  return None;
1238  }
1239 
1240  // Cost of a step. Accesses in the same block are more likely to be valid
1241  // candidates for elimination, hence consider them cheaper.
1242  unsigned StepCost = KillingDef->getBlock() == Current->getBlock()
1245  if (WalkerStepLimit <= StepCost) {
1246  LLVM_DEBUG(dbgs() << " ... hit walker step limit\n");
1247  return None;
1248  }
1249  WalkerStepLimit -= StepCost;
1250 
1251  // Return for MemoryPhis. They cannot be eliminated directly and the
1252  // caller is responsible for traversing them.
1253  if (isa<MemoryPhi>(Current)) {
1254  LLVM_DEBUG(dbgs() << " ... found MemoryPhi\n");
1255  return Current;
1256  }
1257 
1258  // Below, check if CurrentDef is a valid candidate to be eliminated by
1259  // KillingDef. If it is not, check the next candidate.
1260  MemoryDef *CurrentDef = cast<MemoryDef>(Current);
1261  Instruction *CurrentI = CurrentDef->getMemoryInst();
1262 
1263  if (canSkipDef(CurrentDef,
1264  !isInvisibleToCallerBeforeRet(KillingUndObj))) {
1265  CanOptimize = false;
1266  continue;
1267  }
1268 
1269  // Before we try to remove anything, check for any extra throwing
1270  // instructions that block us from DSEing
1271  if (mayThrowBetween(KillingI, CurrentI, KillingUndObj)) {
1272  LLVM_DEBUG(dbgs() << " ... skip, may throw!\n");
1273  return None;
1274  }
1275 
1276  // Check for anything that looks like it will be a barrier to further
1277  // removal
1278  if (isDSEBarrier(KillingUndObj, CurrentI)) {
1279  LLVM_DEBUG(dbgs() << " ... skip, barrier\n");
1280  return None;
1281  }
1282 
1283  // If Current is known to be on path that reads DefLoc or is a read
1284  // clobber, bail out, as the path is not profitable. We skip this check
1285  // for intrinsic calls, because the code knows how to handle memcpy
1286  // intrinsics.
1287  if (!isa<IntrinsicInst>(CurrentI) && isReadClobber(KillingLoc, CurrentI))
1288  return None;
1289 
1290  // Quick check if there are direct uses that are read-clobbers.
1291  if (any_of(Current->uses(), [this, &KillingLoc, StartAccess](Use &U) {
1292  if (auto *UseOrDef = dyn_cast<MemoryUseOrDef>(U.getUser()))
1293  return !MSSA.dominates(StartAccess, UseOrDef) &&
1294  isReadClobber(KillingLoc, UseOrDef->getMemoryInst());
1295  return false;
1296  })) {
1297  LLVM_DEBUG(dbgs() << " ... found a read clobber\n");
1298  return None;
1299  }
1300 
1301  // If Current does not have an analyzable write location or is not
1302  // removable, skip it.
1303  CurrentLoc = getLocForWrite(CurrentI);
1304  if (!CurrentLoc || !isRemovable(CurrentI)) {
1305  CanOptimize = false;
1306  continue;
1307  }
1308 
1309  // AliasAnalysis does not account for loops. Limit elimination to
1310  // candidates for which we can guarantee they always store to the same
1311  // memory location and not located in different loops.
1312  if (!isGuaranteedLoopIndependent(CurrentI, KillingI, *CurrentLoc)) {
1313  LLVM_DEBUG(dbgs() << " ... not guaranteed loop independent\n");
1314  WalkerStepLimit -= 1;
1315  CanOptimize = false;
1316  continue;
1317  }
1318 
1319  if (IsMemTerm) {
1320  // If the killing def is a memory terminator (e.g. lifetime.end), check
1321  // the next candidate if the current Current does not write the same
1322  // underlying object as the terminator.
1323  if (!isMemTerminator(*CurrentLoc, CurrentI, KillingI)) {
1324  CanOptimize = false;
1325  continue;
1326  }
1327  } else {
1328  int64_t KillingOffset = 0;
1329  int64_t DeadOffset = 0;
1330  auto OR = isOverwrite(KillingI, CurrentI, KillingLoc, *CurrentLoc,
1331  KillingOffset, DeadOffset);
1332  if (CanOptimize) {
1333  // CurrentDef is the earliest write clobber of KillingDef. Use it as
1334  // optimized access. Do not optimize if CurrentDef is already the
1335  // defining access of KillingDef.
1336  if (CurrentDef != KillingDef->getDefiningAccess() &&
1337  (OR == OW_Complete || OR == OW_MaybePartial))
1338  KillingDef->setOptimized(CurrentDef);
1339 
1340  // Once a may-aliasing def is encountered do not set an optimized
1341  // access.
1342  if (OR != OW_None)
1343  CanOptimize = false;
1344  }
1345 
1346  // If Current does not write to the same object as KillingDef, check
1347  // the next candidate.
1348  if (OR == OW_Unknown || OR == OW_None)
1349  continue;
1350  else if (OR == OW_MaybePartial) {
1351  // If KillingDef only partially overwrites Current, check the next
1352  // candidate if the partial step limit is exceeded. This aggressively
1353  // limits the number of candidates for partial store elimination,
1354  // which are less likely to be removable in the end.
1355  if (PartialLimit <= 1) {
1356  WalkerStepLimit -= 1;
1357  LLVM_DEBUG(dbgs() << " ... reached partial limit ... continue with next access\n");
1358  continue;
1359  }
1360  PartialLimit -= 1;
1361  }
1362  }
1363  break;
1364  };
1365 
1366  // Accesses to objects accessible after the function returns can only be
1367  // eliminated if the access is dead along all paths to the exit. Collect
1368  // the blocks with killing (=completely overwriting MemoryDefs) and check if
1369  // they cover all paths from MaybeDeadAccess to any function exit.
1370  SmallPtrSet<Instruction *, 16> KillingDefs;
1371  KillingDefs.insert(KillingDef->getMemoryInst());
1372  MemoryAccess *MaybeDeadAccess = Current;
1373  MemoryLocation MaybeDeadLoc = *CurrentLoc;
1374  Instruction *MaybeDeadI = cast<MemoryDef>(MaybeDeadAccess)->getMemoryInst();
1375  LLVM_DEBUG(dbgs() << " Checking for reads of " << *MaybeDeadAccess << " ("
1376  << *MaybeDeadI << ")\n");
1377 
1379  auto PushMemUses = [&WorkList](MemoryAccess *Acc) {
1380  for (Use &U : Acc->uses())
1381  WorkList.insert(cast<MemoryAccess>(U.getUser()));
1382  };
1383  PushMemUses(MaybeDeadAccess);
1384 
1385  // Check if DeadDef may be read.
1386  for (unsigned I = 0; I < WorkList.size(); I++) {
1387  MemoryAccess *UseAccess = WorkList[I];
1388 
1389  LLVM_DEBUG(dbgs() << " " << *UseAccess);
1390  // Bail out if the number of accesses to check exceeds the scan limit.
1391  if (ScanLimit < (WorkList.size() - I)) {
1392  LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n");
1393  return None;
1394  }
1395  --ScanLimit;
1396  NumDomMemDefChecks++;
1397 
1398  if (isa<MemoryPhi>(UseAccess)) {
1399  if (any_of(KillingDefs, [this, UseAccess](Instruction *KI) {
1400  return DT.properlyDominates(KI->getParent(),
1401  UseAccess->getBlock());
1402  })) {
1403  LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing block\n");
1404  continue;
1405  }
1406  LLVM_DEBUG(dbgs() << "\n ... adding PHI uses\n");
1407  PushMemUses(UseAccess);
1408  continue;
1409  }
1410 
1411  Instruction *UseInst = cast<MemoryUseOrDef>(UseAccess)->getMemoryInst();
1412  LLVM_DEBUG(dbgs() << " (" << *UseInst << ")\n");
1413 
1414  if (any_of(KillingDefs, [this, UseInst](Instruction *KI) {
1415  return DT.dominates(KI, UseInst);
1416  })) {
1417  LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing def\n");
1418  continue;
1419  }
1420 
1421  // A memory terminator kills all preceeding MemoryDefs and all succeeding
1422  // MemoryAccesses. We do not have to check it's users.
1423  if (isMemTerminator(MaybeDeadLoc, MaybeDeadI, UseInst)) {
1424  LLVM_DEBUG(
1425  dbgs()
1426  << " ... skipping, memterminator invalidates following accesses\n");
1427  continue;
1428  }
1429 
1430  if (isNoopIntrinsic(cast<MemoryUseOrDef>(UseAccess)->getMemoryInst())) {
1431  LLVM_DEBUG(dbgs() << " ... adding uses of intrinsic\n");
1432  PushMemUses(UseAccess);
1433  continue;
1434  }
1435 
1436  if (UseInst->mayThrow() && !isInvisibleToCallerBeforeRet(KillingUndObj)) {
1437  LLVM_DEBUG(dbgs() << " ... found throwing instruction\n");
1438  return None;
1439  }
1440 
1441  // Uses which may read the original MemoryDef mean we cannot eliminate the
1442  // original MD. Stop walk.
1443  if (isReadClobber(MaybeDeadLoc, UseInst)) {
1444  LLVM_DEBUG(dbgs() << " ... found read clobber\n");
1445  return None;
1446  }
1447 
1448  // If this worklist walks back to the original memory access (and the
1449  // pointer is not guarenteed loop invariant) then we cannot assume that a
1450  // store kills itself.
1451  if (MaybeDeadAccess == UseAccess &&
1452  !isGuaranteedLoopInvariant(MaybeDeadLoc.Ptr)) {
1453  LLVM_DEBUG(dbgs() << " ... found not loop invariant self access\n");
1454  return None;
1455  }
1456  // Otherwise, for the KillingDef and MaybeDeadAccess we only have to check
1457  // if it reads the memory location.
1458  // TODO: It would probably be better to check for self-reads before
1459  // calling the function.
1460  if (KillingDef == UseAccess || MaybeDeadAccess == UseAccess) {
1461  LLVM_DEBUG(dbgs() << " ... skipping killing def/dom access\n");
1462  continue;
1463  }
1464 
1465  // Check all uses for MemoryDefs, except for defs completely overwriting
1466  // the original location. Otherwise we have to check uses of *all*
1467  // MemoryDefs we discover, including non-aliasing ones. Otherwise we might
1468  // miss cases like the following
1469  // 1 = Def(LoE) ; <----- DeadDef stores [0,1]
1470  // 2 = Def(1) ; (2, 1) = NoAlias, stores [2,3]
1471  // Use(2) ; MayAlias 2 *and* 1, loads [0, 3].
1472  // (The Use points to the *first* Def it may alias)
1473  // 3 = Def(1) ; <---- Current (3, 2) = NoAlias, (3,1) = MayAlias,
1474  // stores [0,1]
1475  if (MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess)) {
1476  if (isCompleteOverwrite(MaybeDeadLoc, MaybeDeadI, UseInst)) {
1477  BasicBlock *MaybeKillingBlock = UseInst->getParent();
1478  if (PostOrderNumbers.find(MaybeKillingBlock)->second <
1479  PostOrderNumbers.find(MaybeDeadAccess->getBlock())->second) {
1480  if (!isInvisibleToCallerAfterRet(KillingUndObj)) {
1481  LLVM_DEBUG(dbgs()
1482  << " ... found killing def " << *UseInst << "\n");
1483  KillingDefs.insert(UseInst);
1484  }
1485  } else {
1486  LLVM_DEBUG(dbgs()
1487  << " ... found preceeding def " << *UseInst << "\n");
1488  return None;
1489  }
1490  } else
1491  PushMemUses(UseDef);
1492  }
1493  }
1494 
1495  // For accesses to locations visible after the function returns, make sure
1496  // that the location is dead (=overwritten) along all paths from
1497  // MaybeDeadAccess to the exit.
1498  if (!isInvisibleToCallerAfterRet(KillingUndObj)) {
1499  SmallPtrSet<BasicBlock *, 16> KillingBlocks;
1500  for (Instruction *KD : KillingDefs)
1501  KillingBlocks.insert(KD->getParent());
1502  assert(!KillingBlocks.empty() &&
1503  "Expected at least a single killing block");
1504 
1505  // Find the common post-dominator of all killing blocks.
1506  BasicBlock *CommonPred = *KillingBlocks.begin();
1507  for (BasicBlock *BB : llvm::drop_begin(KillingBlocks)) {
1508  if (!CommonPred)
1509  break;
1510  CommonPred = PDT.findNearestCommonDominator(CommonPred, BB);
1511  }
1512 
1513  // If CommonPred is in the set of killing blocks, just check if it
1514  // post-dominates MaybeDeadAccess.
1515  if (KillingBlocks.count(CommonPred)) {
1516  if (PDT.dominates(CommonPred, MaybeDeadAccess->getBlock()))
1517  return {MaybeDeadAccess};
1518  return None;
1519  }
1520 
1521  // If the common post-dominator does not post-dominate MaybeDeadAccess,
1522  // there is a path from MaybeDeadAccess to an exit not going through a
1523  // killing block.
1524  if (PDT.dominates(CommonPred, MaybeDeadAccess->getBlock())) {
1525  SetVector<BasicBlock *> WorkList;
1526 
1527  // If CommonPred is null, there are multiple exits from the function.
1528  // They all have to be added to the worklist.
1529  if (CommonPred)
1530  WorkList.insert(CommonPred);
1531  else
1532  for (BasicBlock *R : PDT.roots())
1533  WorkList.insert(R);
1534 
1535  NumCFGTries++;
1536  // Check if all paths starting from an exit node go through one of the
1537  // killing blocks before reaching MaybeDeadAccess.
1538  for (unsigned I = 0; I < WorkList.size(); I++) {
1539  NumCFGChecks++;
1540  BasicBlock *Current = WorkList[I];
1541  if (KillingBlocks.count(Current))
1542  continue;
1543  if (Current == MaybeDeadAccess->getBlock())
1544  return None;
1545 
1546  // MaybeDeadAccess is reachable from the entry, so we don't have to
1547  // explore unreachable blocks further.
1548  if (!DT.isReachableFromEntry(Current))
1549  continue;
1550 
1551  for (BasicBlock *Pred : predecessors(Current))
1552  WorkList.insert(Pred);
1553 
1554  if (WorkList.size() >= MemorySSAPathCheckLimit)
1555  return None;
1556  }
1557  NumCFGSuccess++;
1558  return {MaybeDeadAccess};
1559  }
1560  return None;
1561  }
1562 
1563  // No aliasing MemoryUses of MaybeDeadAccess found, MaybeDeadAccess is
1564  // potentially dead.
1565  return {MaybeDeadAccess};
1566  }
1567 
1568  // Delete dead memory defs
1570  MemorySSAUpdater Updater(&MSSA);
1571  SmallVector<Instruction *, 32> NowDeadInsts;
1572  NowDeadInsts.push_back(SI);
1573  --NumFastOther;
1574 
1575  while (!NowDeadInsts.empty()) {
1576  Instruction *DeadInst = NowDeadInsts.pop_back_val();
1577  ++NumFastOther;
1578 
1579  // Try to preserve debug information attached to the dead instruction.
1580  salvageDebugInfo(*DeadInst);
1581  salvageKnowledge(DeadInst);
1582 
1583  // Remove the Instruction from MSSA.
1584  if (MemoryAccess *MA = MSSA.getMemoryAccess(DeadInst)) {
1585  if (MemoryDef *MD = dyn_cast<MemoryDef>(MA)) {
1586  SkipStores.insert(MD);
1587  }
1588 
1589  Updater.removeMemoryAccess(MA);
1590  }
1591 
1592  auto I = IOLs.find(DeadInst->getParent());
1593  if (I != IOLs.end())
1594  I->second.erase(DeadInst);
1595  // Remove its operands
1596  for (Use &O : DeadInst->operands())
1597  if (Instruction *OpI = dyn_cast<Instruction>(O)) {
1598  O = nullptr;
1599  if (isInstructionTriviallyDead(OpI, &TLI))
1600  NowDeadInsts.push_back(OpI);
1601  }
1602 
1603  EI.removeInstruction(DeadInst);
1604  DeadInst->eraseFromParent();
1605  }
1606  }
1607 
1608  // Check for any extra throws between \p KillingI and \p DeadI that block
1609  // DSE. This only checks extra maythrows (those that aren't MemoryDef's).
1610  // MemoryDef that may throw are handled during the walk from one def to the
1611  // next.
1612  bool mayThrowBetween(Instruction *KillingI, Instruction *DeadI,
1613  const Value *KillingUndObj) {
1614  // First see if we can ignore it by using the fact that KillingI is an
1615  // alloca/alloca like object that is not visible to the caller during
1616  // execution of the function.
1617  if (KillingUndObj && isInvisibleToCallerBeforeRet(KillingUndObj))
1618  return false;
1619 
1620  if (KillingI->getParent() == DeadI->getParent())
1621  return ThrowingBlocks.count(KillingI->getParent());
1622  return !ThrowingBlocks.empty();
1623  }
1624 
1625  // Check if \p DeadI acts as a DSE barrier for \p KillingI. The following
1626  // instructions act as barriers:
1627  // * A memory instruction that may throw and \p KillingI accesses a non-stack
1628  // object.
1629  // * Atomic stores stronger that monotonic.
1630  bool isDSEBarrier(const Value *KillingUndObj, Instruction *DeadI) {
1631  // If DeadI may throw it acts as a barrier, unless we are to an
1632  // alloca/alloca like object that does not escape.
1633  if (DeadI->mayThrow() && !isInvisibleToCallerBeforeRet(KillingUndObj))
1634  return true;
1635 
1636  // If DeadI is an atomic load/store stronger than monotonic, do not try to
1637  // eliminate/reorder it.
1638  if (DeadI->isAtomic()) {
1639  if (auto *LI = dyn_cast<LoadInst>(DeadI))
1640  return isStrongerThanMonotonic(LI->getOrdering());
1641  if (auto *SI = dyn_cast<StoreInst>(DeadI))
1642  return isStrongerThanMonotonic(SI->getOrdering());
1643  if (auto *ARMW = dyn_cast<AtomicRMWInst>(DeadI))
1644  return isStrongerThanMonotonic(ARMW->getOrdering());
1645  if (auto *CmpXchg = dyn_cast<AtomicCmpXchgInst>(DeadI))
1646  return isStrongerThanMonotonic(CmpXchg->getSuccessOrdering()) ||
1647  isStrongerThanMonotonic(CmpXchg->getFailureOrdering());
1648  llvm_unreachable("other instructions should be skipped in MemorySSA");
1649  }
1650  return false;
1651  }
1652 
1653  /// Eliminate writes to objects that are not visible in the caller and are not
1654  /// accessed before returning from the function.
1655  bool eliminateDeadWritesAtEndOfFunction() {
1656  bool MadeChange = false;
1657  LLVM_DEBUG(
1658  dbgs()
1659  << "Trying to eliminate MemoryDefs at the end of the function\n");
1660  for (MemoryDef *Def : llvm::reverse(MemDefs)) {
1661  if (SkipStores.contains(Def))
1662  continue;
1663 
1664  Instruction *DefI = Def->getMemoryInst();
1665  auto DefLoc = getLocForWrite(DefI);
1666  if (!DefLoc || !isRemovable(DefI))
1667  continue;
1668 
1669  // NOTE: Currently eliminating writes at the end of a function is limited
1670  // to MemoryDefs with a single underlying object, to save compile-time. In
1671  // practice it appears the case with multiple underlying objects is very
1672  // uncommon. If it turns out to be important, we can use
1673  // getUnderlyingObjects here instead.
1674  const Value *UO = getUnderlyingObject(DefLoc->Ptr);
1675  if (!isInvisibleToCallerAfterRet(UO))
1676  continue;
1677 
1678  if (isWriteAtEndOfFunction(Def)) {
1679  // See through pointer-to-pointer bitcasts
1680  LLVM_DEBUG(dbgs() << " ... MemoryDef is not accessed until the end "
1681  "of the function\n");
1682  deleteDeadInstruction(DefI);
1683  ++NumFastStores;
1684  MadeChange = true;
1685  }
1686  }
1687  return MadeChange;
1688  }
1689 
1690  /// If we have a zero initializing memset following a call to malloc,
1691  /// try folding it into a call to calloc.
1692  bool tryFoldIntoCalloc(MemoryDef *Def, const Value *DefUO) {
1693  Instruction *DefI = Def->getMemoryInst();
1694  MemSetInst *MemSet = dyn_cast<MemSetInst>(DefI);
1695  if (!MemSet)
1696  // TODO: Could handle zero store to small allocation as well.
1697  return false;
1698  Constant *StoredConstant = dyn_cast<Constant>(MemSet->getValue());
1699  if (!StoredConstant || !StoredConstant->isNullValue())
1700  return false;
1701 
1702  if (!isRemovable(DefI))
1703  // The memset might be volatile..
1704  return false;
1705 
1706  if (F.hasFnAttribute(Attribute::SanitizeMemory) ||
1707  F.hasFnAttribute(Attribute::SanitizeAddress) ||
1708  F.hasFnAttribute(Attribute::SanitizeHWAddress) ||
1709  F.getName() == "calloc")
1710  return false;
1711  auto *Malloc = const_cast<CallInst *>(dyn_cast<CallInst>(DefUO));
1712  if (!Malloc)
1713  return false;
1714  auto *InnerCallee = Malloc->getCalledFunction();
1715  if (!InnerCallee)
1716  return false;
1717  LibFunc Func;
1718  if (!TLI.getLibFunc(*InnerCallee, Func) || !TLI.has(Func) ||
1719  Func != LibFunc_malloc)
1720  return false;
1721 
1722  auto shouldCreateCalloc = [](CallInst *Malloc, CallInst *Memset) {
1723  // Check for br(icmp ptr, null), truebb, falsebb) pattern at the end
1724  // of malloc block
1725  auto *MallocBB = Malloc->getParent(),
1726  *MemsetBB = Memset->getParent();
1727  if (MallocBB == MemsetBB)
1728  return true;
1729  auto *Ptr = Memset->getArgOperand(0);
1730  auto *TI = MallocBB->getTerminator();
1731  ICmpInst::Predicate Pred;
1732  BasicBlock *TrueBB, *FalseBB;
1733  if (!match(TI, m_Br(m_ICmp(Pred, m_Specific(Ptr), m_Zero()), TrueBB,
1734  FalseBB)))
1735  return false;
1736  if (Pred != ICmpInst::ICMP_EQ || MemsetBB != FalseBB)
1737  return false;
1738  return true;
1739  };
1740 
1741  if (Malloc->getOperand(0) != MemSet->getLength())
1742  return false;
1743  if (!shouldCreateCalloc(Malloc, MemSet) ||
1744  !DT.dominates(Malloc, MemSet) ||
1745  !memoryIsNotModifiedBetween(Malloc, MemSet, BatchAA, DL, &DT))
1746  return false;
1747  IRBuilder<> IRB(Malloc);
1748  const auto &DL = Malloc->getModule()->getDataLayout();
1749  auto *Calloc =
1750  emitCalloc(ConstantInt::get(IRB.getIntPtrTy(DL), 1),
1751  Malloc->getArgOperand(0), IRB, TLI);
1752  if (!Calloc)
1753  return false;
1754  MemorySSAUpdater Updater(&MSSA);
1755  auto *LastDef =
1756  cast<MemoryDef>(Updater.getMemorySSA()->getMemoryAccess(Malloc));
1757  auto *NewAccess =
1758  Updater.createMemoryAccessAfter(cast<Instruction>(Calloc), LastDef,
1759  LastDef);
1760  auto *NewAccessMD = cast<MemoryDef>(NewAccess);
1761  Updater.insertDef(NewAccessMD, /*RenameUses=*/true);
1762  Updater.removeMemoryAccess(Malloc);
1763  Malloc->replaceAllUsesWith(Calloc);
1764  Malloc->eraseFromParent();
1765  return true;
1766  }
1767 
1768  /// \returns true if \p Def is a no-op store, either because it
1769  /// directly stores back a loaded value or stores zero to a calloced object.
1770  bool storeIsNoop(MemoryDef *Def, const Value *DefUO) {
1771  Instruction *DefI = Def->getMemoryInst();
1772  StoreInst *Store = dyn_cast<StoreInst>(DefI);
1773  MemSetInst *MemSet = dyn_cast<MemSetInst>(DefI);
1774  Constant *StoredConstant = nullptr;
1775  if (Store)
1776  StoredConstant = dyn_cast<Constant>(Store->getOperand(0));
1777  else if (MemSet)
1778  StoredConstant = dyn_cast<Constant>(MemSet->getValue());
1779  else
1780  return false;
1781 
1782  if (!isRemovable(DefI))
1783  return false;
1784 
1785  if (StoredConstant && isAllocationFn(DefUO, &TLI)) {
1786  auto *CB = cast<CallBase>(DefUO);
1787  auto *InitC = getInitialValueOfAllocation(CB, &TLI,
1788  StoredConstant->getType());
1789  // If the clobbering access is LiveOnEntry, no instructions between them
1790  // can modify the memory location.
1791  if (InitC && InitC == StoredConstant)
1792  return MSSA.isLiveOnEntryDef(
1794  }
1795 
1796  if (!Store)
1797  return false;
1798 
1799  if (auto *LoadI = dyn_cast<LoadInst>(Store->getOperand(0))) {
1800  if (LoadI->getPointerOperand() == Store->getOperand(1)) {
1801  // Get the defining access for the load.
1802  auto *LoadAccess = MSSA.getMemoryAccess(LoadI)->getDefiningAccess();
1803  // Fast path: the defining accesses are the same.
1804  if (LoadAccess == Def->getDefiningAccess())
1805  return true;
1806 
1807  // Look through phi accesses. Recursively scan all phi accesses by
1808  // adding them to a worklist. Bail when we run into a memory def that
1809  // does not match LoadAccess.
1810  SetVector<MemoryAccess *> ToCheck;
1811  MemoryAccess *Current =
1813  // We don't want to bail when we run into the store memory def. But,
1814  // the phi access may point to it. So, pretend like we've already
1815  // checked it.
1816  ToCheck.insert(Def);
1817  ToCheck.insert(Current);
1818  // Start at current (1) to simulate already having checked Def.
1819  for (unsigned I = 1; I < ToCheck.size(); ++I) {
1820  Current = ToCheck[I];
1821  if (auto PhiAccess = dyn_cast<MemoryPhi>(Current)) {
1822  // Check all the operands.
1823  for (auto &Use : PhiAccess->incoming_values())
1824  ToCheck.insert(cast<MemoryAccess>(&Use));
1825  continue;
1826  }
1827 
1828  // If we found a memory def, bail. This happens when we have an
1829  // unrelated write in between an otherwise noop store.
1830  assert(isa<MemoryDef>(Current) &&
1831  "Only MemoryDefs should reach here.");
1832  // TODO: Skip no alias MemoryDefs that have no aliasing reads.
1833  // We are searching for the definition of the store's destination.
1834  // So, if that is the same definition as the load, then this is a
1835  // noop. Otherwise, fail.
1836  if (LoadAccess != Current)
1837  return false;
1838  }
1839  return true;
1840  }
1841  }
1842 
1843  return false;
1844  }
1845 
1846  bool removePartiallyOverlappedStores(InstOverlapIntervalsTy &IOL) {
1847  bool Changed = false;
1848  for (auto OI : IOL) {
1849  Instruction *DeadI = OI.first;
1850  MemoryLocation Loc = *getLocForWrite(DeadI);
1851  assert(isRemovable(DeadI) && "Expect only removable instruction");
1852 
1853  const Value *Ptr = Loc.Ptr->stripPointerCasts();
1854  int64_t DeadStart = 0;
1855  uint64_t DeadSize = Loc.Size.getValue();
1856  GetPointerBaseWithConstantOffset(Ptr, DeadStart, DL);
1857  OverlapIntervalsTy &IntervalMap = OI.second;
1858  Changed |= tryToShortenEnd(DeadI, IntervalMap, DeadStart, DeadSize);
1859  if (IntervalMap.empty())
1860  continue;
1861  Changed |= tryToShortenBegin(DeadI, IntervalMap, DeadStart, DeadSize);
1862  }
1863  return Changed;
1864  }
1865 
1866  /// Eliminates writes to locations where the value that is being written
1867  /// is already stored at the same location.
1868  bool eliminateRedundantStoresOfExistingValues() {
1869  bool MadeChange = false;
1870  LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs that write the "
1871  "already existing value\n");
1872  for (auto *Def : MemDefs) {
1873  if (SkipStores.contains(Def) || MSSA.isLiveOnEntryDef(Def))
1874  continue;
1875 
1876  Instruction *DefInst = Def->getMemoryInst();
1877  auto MaybeDefLoc = getLocForWrite(DefInst);
1878  if (!MaybeDefLoc || !isRemovable(DefInst))
1879  continue;
1880 
1881  MemoryDef *UpperDef;
1882  // To conserve compile-time, we avoid walking to the next clobbering def.
1883  // Instead, we just try to get the optimized access, if it exists. DSE
1884  // will try to optimize defs during the earlier traversal.
1885  if (Def->isOptimized())
1886  UpperDef = dyn_cast<MemoryDef>(Def->getOptimized());
1887  else
1888  UpperDef = dyn_cast<MemoryDef>(Def->getDefiningAccess());
1889  if (!UpperDef || MSSA.isLiveOnEntryDef(UpperDef))
1890  continue;
1891 
1892  Instruction *UpperInst = UpperDef->getMemoryInst();
1893  auto IsRedundantStore = [&]() {
1894  if (DefInst->isIdenticalTo(UpperInst))
1895  return true;
1896  if (auto *MemSetI = dyn_cast<MemSetInst>(UpperInst)) {
1897  if (auto *SI = dyn_cast<StoreInst>(DefInst)) {
1898  // MemSetInst must have a write location.
1899  MemoryLocation UpperLoc = *getLocForWrite(UpperInst);
1900  int64_t InstWriteOffset = 0;
1901  int64_t DepWriteOffset = 0;
1902  auto OR = isOverwrite(UpperInst, DefInst, UpperLoc, *MaybeDefLoc,
1903  InstWriteOffset, DepWriteOffset);
1904  Value *StoredByte = isBytewiseValue(SI->getValueOperand(), DL);
1905  return StoredByte && StoredByte == MemSetI->getOperand(1) &&
1906  OR == OW_Complete;
1907  }
1908  }
1909  return false;
1910  };
1911 
1912  if (!IsRedundantStore() || isReadClobber(*MaybeDefLoc, DefInst))
1913  continue;
1914  LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " << *DefInst
1915  << '\n');
1916  deleteDeadInstruction(DefInst);
1917  NumRedundantStores++;
1918  MadeChange = true;
1919  }
1920  return MadeChange;
1921  }
1922 };
1923 
1924 static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
1925  DominatorTree &DT, PostDominatorTree &PDT,
1926  const TargetLibraryInfo &TLI,
1927  const LoopInfo &LI) {
1928  bool MadeChange = false;
1929 
1930  DSEState State(F, AA, MSSA, DT, PDT, TLI, LI);
1931  // For each store:
1932  for (unsigned I = 0; I < State.MemDefs.size(); I++) {
1933  MemoryDef *KillingDef = State.MemDefs[I];
1934  if (State.SkipStores.count(KillingDef))
1935  continue;
1936  Instruction *KillingI = KillingDef->getMemoryInst();
1937 
1938  Optional<MemoryLocation> MaybeKillingLoc;
1939  if (State.isMemTerminatorInst(KillingI))
1940  MaybeKillingLoc = State.getLocForTerminator(KillingI).map(
1941  [](const std::pair<MemoryLocation, bool> &P) { return P.first; });
1942  else
1943  MaybeKillingLoc = State.getLocForWrite(KillingI);
1944 
1945  if (!MaybeKillingLoc) {
1946  LLVM_DEBUG(dbgs() << "Failed to find analyzable write location for "
1947  << *KillingI << "\n");
1948  continue;
1949  }
1950  MemoryLocation KillingLoc = *MaybeKillingLoc;
1951  assert(KillingLoc.Ptr && "KillingLoc should not be null");
1952  const Value *KillingUndObj = getUnderlyingObject(KillingLoc.Ptr);
1953  LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs killed by "
1954  << *KillingDef << " (" << *KillingI << ")\n");
1955 
1956  unsigned ScanLimit = MemorySSAScanLimit;
1957  unsigned WalkerStepLimit = MemorySSAUpwardsStepLimit;
1958  unsigned PartialLimit = MemorySSAPartialStoreLimit;
1959  // Worklist of MemoryAccesses that may be killed by KillingDef.
1960  SetVector<MemoryAccess *> ToCheck;
1961  ToCheck.insert(KillingDef->getDefiningAccess());
1962 
1963  bool Shortend = false;
1964  bool IsMemTerm = State.isMemTerminatorInst(KillingI);
1965  // Check if MemoryAccesses in the worklist are killed by KillingDef.
1966  for (unsigned I = 0; I < ToCheck.size(); I++) {
1967  MemoryAccess *Current = ToCheck[I];
1968  if (State.SkipStores.count(Current))
1969  continue;
1970 
1971  Optional<MemoryAccess *> MaybeDeadAccess = State.getDomMemoryDef(
1972  KillingDef, Current, KillingLoc, KillingUndObj, ScanLimit,
1973  WalkerStepLimit, IsMemTerm, PartialLimit);
1974 
1975  if (!MaybeDeadAccess) {
1976  LLVM_DEBUG(dbgs() << " finished walk\n");
1977  continue;
1978  }
1979 
1980  MemoryAccess *DeadAccess = *MaybeDeadAccess;
1981  LLVM_DEBUG(dbgs() << " Checking if we can kill " << *DeadAccess);
1982  if (isa<MemoryPhi>(DeadAccess)) {
1983  LLVM_DEBUG(dbgs() << "\n ... adding incoming values to worklist\n");
1984  for (Value *V : cast<MemoryPhi>(DeadAccess)->incoming_values()) {
1985  MemoryAccess *IncomingAccess = cast<MemoryAccess>(V);
1986  BasicBlock *IncomingBlock = IncomingAccess->getBlock();
1987  BasicBlock *PhiBlock = DeadAccess->getBlock();
1988 
1989  // We only consider incoming MemoryAccesses that come before the
1990  // MemoryPhi. Otherwise we could discover candidates that do not
1991  // strictly dominate our starting def.
1992  if (State.PostOrderNumbers[IncomingBlock] >
1993  State.PostOrderNumbers[PhiBlock])
1994  ToCheck.insert(IncomingAccess);
1995  }
1996  continue;
1997  }
1998  auto *DeadDefAccess = cast<MemoryDef>(DeadAccess);
1999  Instruction *DeadI = DeadDefAccess->getMemoryInst();
2000  LLVM_DEBUG(dbgs() << " (" << *DeadI << ")\n");
2001  ToCheck.insert(DeadDefAccess->getDefiningAccess());
2002  NumGetDomMemoryDefPassed++;
2003 
2004  if (!DebugCounter::shouldExecute(MemorySSACounter))
2005  continue;
2006 
2007  MemoryLocation DeadLoc = *State.getLocForWrite(DeadI);
2008 
2009  if (IsMemTerm) {
2010  const Value *DeadUndObj = getUnderlyingObject(DeadLoc.Ptr);
2011  if (KillingUndObj != DeadUndObj)
2012  continue;
2013  LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *DeadI
2014  << "\n KILLER: " << *KillingI << '\n');
2015  State.deleteDeadInstruction(DeadI);
2016  ++NumFastStores;
2017  MadeChange = true;
2018  } else {
2019  // Check if DeadI overwrites KillingI.
2020  int64_t KillingOffset = 0;
2021  int64_t DeadOffset = 0;
2022  OverwriteResult OR = State.isOverwrite(
2023  KillingI, DeadI, KillingLoc, DeadLoc, KillingOffset, DeadOffset);
2024  if (OR == OW_MaybePartial) {
2025  auto Iter = State.IOLs.insert(
2026  std::make_pair<BasicBlock *, InstOverlapIntervalsTy>(
2027  DeadI->getParent(), InstOverlapIntervalsTy()));
2028  auto &IOL = Iter.first->second;
2029  OR = isPartialOverwrite(KillingLoc, DeadLoc, KillingOffset,
2030  DeadOffset, DeadI, IOL);
2031  }
2032 
2033  if (EnablePartialStoreMerging && OR == OW_PartialEarlierWithFullLater) {
2034  auto *DeadSI = dyn_cast<StoreInst>(DeadI);
2035  auto *KillingSI = dyn_cast<StoreInst>(KillingI);
2036  // We are re-using tryToMergePartialOverlappingStores, which requires
2037  // DeadSI to dominate DeadSI.
2038  // TODO: implement tryToMergeParialOverlappingStores using MemorySSA.
2039  if (DeadSI && KillingSI && DT.dominates(DeadSI, KillingSI)) {
2041  KillingSI, DeadSI, KillingOffset, DeadOffset, State.DL,
2042  State.BatchAA, &DT)) {
2043 
2044  // Update stored value of earlier store to merged constant.
2045  DeadSI->setOperand(0, Merged);
2046  ++NumModifiedStores;
2047  MadeChange = true;
2048 
2049  Shortend = true;
2050  // Remove killing store and remove any outstanding overlap
2051  // intervals for the updated store.
2052  State.deleteDeadInstruction(KillingSI);
2053  auto I = State.IOLs.find(DeadSI->getParent());
2054  if (I != State.IOLs.end())
2055  I->second.erase(DeadSI);
2056  break;
2057  }
2058  }
2059  }
2060 
2061  if (OR == OW_Complete) {
2062  LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *DeadI
2063  << "\n KILLER: " << *KillingI << '\n');
2064  State.deleteDeadInstruction(DeadI);
2065  ++NumFastStores;
2066  MadeChange = true;
2067  }
2068  }
2069  }
2070 
2071  // Check if the store is a no-op.
2072  if (!Shortend && State.storeIsNoop(KillingDef, KillingUndObj)) {
2073  LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " << *KillingI
2074  << '\n');
2075  State.deleteDeadInstruction(KillingI);
2076  NumRedundantStores++;
2077  MadeChange = true;
2078  continue;
2079  }
2080 
2081  // Can we form a calloc from a memset/malloc pair?
2082  if (!Shortend && State.tryFoldIntoCalloc(KillingDef, KillingUndObj)) {
2083  LLVM_DEBUG(dbgs() << "DSE: Remove memset after forming calloc:\n"
2084  << " DEAD: " << *KillingI << '\n');
2085  State.deleteDeadInstruction(KillingI);
2086  MadeChange = true;
2087  continue;
2088  }
2089  }
2090 
2092  for (auto &KV : State.IOLs)
2093  MadeChange |= State.removePartiallyOverlappedStores(KV.second);
2094 
2095  MadeChange |= State.eliminateRedundantStoresOfExistingValues();
2096  MadeChange |= State.eliminateDeadWritesAtEndOfFunction();
2097  return MadeChange;
2098 }
2099 } // end anonymous namespace
2100 
2101 //===----------------------------------------------------------------------===//
2102 // DSE Pass
2103 //===----------------------------------------------------------------------===//
2105  AliasAnalysis &AA = AM.getResult<AAManager>(F);
2108  MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
2110  LoopInfo &LI = AM.getResult<LoopAnalysis>(F);
2111 
2112  bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, LI);
2113 
2114 #ifdef LLVM_ENABLE_STATS
2115  if (AreStatisticsEnabled())
2116  for (auto &I : instructions(F))
2117  NumRemainingStores += isa<StoreInst>(&I);
2118 #endif
2119 
2120  if (!Changed)
2121  return PreservedAnalyses::all();
2122 
2123  PreservedAnalyses PA;
2124  PA.preserveSet<CFGAnalyses>();
2126  PA.preserve<LoopAnalysis>();
2127  return PA;
2128 }
2129 
2130 namespace {
2131 
2132 /// A legacy pass for the legacy pass manager that wraps \c DSEPass.
2133 class DSELegacyPass : public FunctionPass {
2134 public:
2135  static char ID; // Pass identification, replacement for typeid
2136 
2137  DSELegacyPass() : FunctionPass(ID) {
2139  }
2140 
2141  bool runOnFunction(Function &F) override {
2142  if (skipFunction(F))
2143  return false;
2144 
2145  AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
2146  DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2147  const TargetLibraryInfo &TLI =
2148  getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
2149  MemorySSA &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
2150  PostDominatorTree &PDT =
2151  getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
2152  LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2153 
2154  bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, LI);
2155 
2156 #ifdef LLVM_ENABLE_STATS
2157  if (AreStatisticsEnabled())
2158  for (auto &I : instructions(F))
2159  NumRemainingStores += isa<StoreInst>(&I);
2160 #endif
2161 
2162  return Changed;
2163  }
2164 
2165  void getAnalysisUsage(AnalysisUsage &AU) const override {
2166  AU.setPreservesCFG();
2178  }
2179 };
2180 
2181 } // end anonymous namespace
2182 
2183 char DSELegacyPass::ID = 0;
2184 
2185 INITIALIZE_PASS_BEGIN(DSELegacyPass, "dse", "Dead Store Elimination", false,
2186  false)
2196  false)
2197 
2199  return new DSELegacyPass();
2200 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
dse
dse
Definition: DeadStoreElimination.cpp:2195
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
llvm::BatchAAResults
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
Definition: AliasAnalysis.h:947
llvm::Argument
This class represents an incoming formal argument to a Function.
Definition: Argument.h:29
llvm::AAManager
A manager for alias analyses.
Definition: AliasAnalysis.h:1287
getPointerSize
static uint64_t getPointerSize(const Value *V, const DataLayout &DL, const TargetLibraryInfo &TLI, const Function *F)
Definition: DeadStoreElimination.cpp:211
llvm::isAligned
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:138
llvm::EarliestEscapeInfo::removeInstruction
void removeInstruction(Instruction *I)
Definition: BasicAliasAnalysis.cpp:253
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:105
llvm::AliasResult::PartialAlias
@ PartialAlias
The two locations alias, but only due to a partial overlap.
Definition: AliasAnalysis.h:104
MathExtras.h
llvm::MemoryLocation::get
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
Definition: MemoryLocation.cpp:37
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AllocatorList.h:23
llvm::DominatorTreeBase::findNearestCommonDominator
NodeT * findNearestCommonDominator(NodeT *A, NodeT *B) const
Find nearest common dominator basic block for basic block A and B.
Definition: GenericDomTree.h:468
llvm::tgtok::Def
@ Def
Definition: TGLexer.h:50
llvm::Instruction::getModule
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:66
llvm::CmpInst::ICMP_EQ
@ ICMP_EQ
equal
Definition: InstrTypes.h:742
llvm::drop_begin
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:321
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::ISD::OR
@ OR
Definition: ISDOpcodes.h:633
llvm::Type::getInt8PtrTy
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:293
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:721
llvm::User::operands
op_range operands()
Definition: User.h:242
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:90
llvm::PHITransAddr
PHITransAddr - An address value which tracks and handles phi translation.
Definition: PHITransAddr.h:35
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
IntrinsicInst.h
llvm::RegState::Dead
@ Dead
Unused definition.
Definition: MachineInstrBuilder.h:50
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:783
llvm::MemoryLocation::Ptr
const Value * Ptr
The address of the start of the location.
Definition: MemoryLocation.h:217
Scalar.h
InstIterator.h
MemorySSAUpdater.h
llvm::Function
Definition: Function.h:62
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
StringRef.h
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
Pass.h
llvm::IntrinsicInst::getIntrinsicID
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:52
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::size
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:77
llvm::emitCalloc
Value * emitCalloc(Value *Num, Value *Size, IRBuilderBase &B, const TargetLibraryInfo &TLI)
Emit a call to the calloc function.
Definition: BuildLibCalls.cpp:1657
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1177
Statistic.h
CaptureTracking.h
llvm::EarliestEscapeInfo
Context-sensitive CaptureInfo provider, which computes and caches the earliest common dominator closu...
Definition: AliasAnalysis.h:402
ErrorHandling.h
llvm::Type::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:734
llvm::Function::getEntryBlock
const BasicBlock & getEntryBlock() const
Definition: Function.h:707
llvm::IRBuilder<>
MapVector.h
DeadStoreElimination.h
ValueTracking.h
Local.h
llvm::getInitialValueOfAllocation
Constant * getInitialValueOfAllocation(const CallBase *Alloc, const TargetLibraryInfo *TLI, Type *Ty)
If this allocation function initializes memory to a fixed value, return said value in the requested t...
Definition: MemoryBuiltins.cpp:382
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
GlobalsModRef.h
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::PatternMatch::m_Br
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
Definition: PatternMatch.h:1741
llvm::APInt::getBitsSet
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:241
APInt.h
MemorySSASameBBStepCost
static cl::opt< unsigned > MemorySSASameBBStepCost("dse-memoryssa-samebb-cost", cl::init(1), cl::Hidden, cl::desc("The cost of a step in the same basic block as the killing MemoryDef" "(default = 1)"))
llvm::MemoryLocation::Size
LocationSize Size
The maximum size of the location, in address-units, or UnknownSize if the size is not known.
Definition: MemoryLocation.h:226
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
DenseMap.h
llvm::APInt::getBitWidth
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1412
Module.h
MemoryBuiltins.h
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:414
llvm::ObjectSizeOpts::NullIsUnknownSize
bool NullIsUnknownSize
If this is true, null pointers in address space 0 will be treated as though they can't be evaluated.
Definition: MemoryBuiltins.h:145
llvm::isBytewiseValue
Value * isBytewiseValue(Value *V, const DataLayout &DL)
If the specified value can be set by repeating the same byte in memory, return the i8 value that it i...
Definition: ValueTracking.cpp:3746
llvm::LoopInfoWrapperPass
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:1271
llvm::createDeadStoreEliminationPass
FunctionPass * createDeadStoreEliminationPass()
Definition: DeadStoreElimination.cpp:2198
llvm::Optional
Definition: APInt.h:33
llvm::IntervalMap::empty
bool empty() const
empty - Return true when no intervals are mapped.
Definition: IntervalMap.h:1055
llvm::PHITransAddr::IsPotentiallyPHITranslatable
bool IsPotentiallyPHITranslatable() const
IsPotentiallyPHITranslatable - If this needs PHI translation, return true if we have some hope of doi...
Definition: PHITransAddr.cpp:114
llvm::AliasResult
The possible results of an alias query.
Definition: AliasAnalysis.h:81
llvm::MapVector
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:37
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:449
llvm::Instruction::mayThrow
bool mayThrow() const
Return true if this instruction may throw an exception.
Definition: Instruction.cpp:683
llvm::SPII::Store
@ Store
Definition: SparcInstrInfo.h:33
llvm::SmallVectorImpl::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:644
tryToShortenBegin
static bool tryToShortenBegin(Instruction *DeadI, OverlapIntervalsTy &IntervalMap, int64_t &DeadStart, uint64_t &DeadSize)
Definition: DeadStoreElimination.cpp:603
llvm::LocationSize::isPrecise
bool isPrecise() const
Definition: MemoryLocation.h:165
llvm::BitmaskEnumDetail::Mask
std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
llvm::Type::getInt8Ty
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:239
llvm::BatchAAResults::getModRefInfo
ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc)
Definition: AliasAnalysis.h:962
MustExecute.h
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::MemSetBase::getValue
Value * getValue() const
Definition: IntrinsicInst.h:818
llvm::AtomicOrdering::Monotonic
@ Monotonic
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
AliasAnalysis.h
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::isAllocationFn
bool isAllocationFn(const Value *V, const TargetLibraryInfo *TLI)
Tests if a value is a call or invoke to a library function that allocates or reallocates memory (eith...
Definition: MemoryBuiltins.cpp:229
llvm::DominatorTree::dominates
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:115
Instruction.h
CommandLine.h
MemorySSAScanLimit
static cl::opt< unsigned > MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(150), cl::Hidden, cl::desc("The number of memory instructions to scan for " "dead store elimination (default = 150)"))
llvm::StoreInst::getValueOperand
Value * getValueOperand()
Definition: Instructions.h:403
llvm::AreStatisticsEnabled
bool AreStatisticsEnabled()
Check if statistics are enabled.
Definition: Statistic.cpp:138
llvm::isNoAliasCall
bool isNoAliasCall(const Value *V)
Return true if this pointer is returned by a noalias function.
Definition: AliasAnalysis.cpp:963
llvm::MemorySSAWrapperPass
Legacy analysis pass which computes MemorySSA.
Definition: MemorySSA.h:980
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
Constants.h
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
llvm::AAResults
Definition: AliasAnalysis.h:507
PostDominators.h
llvm::Constant::isNullValue
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:74
llvm::MemorySSA::isLiveOnEntryDef
bool isLiveOnEntryDef(const MemoryAccess *MA) const
Return true if MA represents the live on entry value.
Definition: MemorySSA.h:741
OverlapIntervalsTy
std::map< int64_t, int64_t > OverlapIntervalsTy
Definition: DeadStoreElimination.cpp:175
llvm::LibFunc
LibFunc
Definition: TargetLibraryInfo.h:34
Intrinsics.h
InstrTypes.h
llvm::CallBase::getCalledFunction
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation.
Definition: InstrTypes.h:1398
llvm::PostDominatorTreeWrapperPass
Definition: PostDominators.h:73
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::LocationSize::getValue
uint64_t getValue() const
Definition: MemoryLocation.h:158
llvm::AliasResult::hasOffset
constexpr bool hasOffset() const
Definition: AliasAnalysis.h:117
TargetLibraryInfo.h
AssumeBundleBuilder.h
llvm::Value::uses
iterator_range< use_iterator > uses()
Definition: Value.h:376
false
Definition: StackSlotColoring.cpp:142
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::TargetLibraryInfo::getLibFunc
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
Definition: TargetLibraryInfo.h:291
llvm::PatternMatch::m_ConstantInt
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:145
llvm::Instruction
Definition: Instruction.h:45
llvm::DebugCounter::shouldExecute
static bool shouldExecute(unsigned CounterName)
Definition: DebugCounter.h:74
Elimination
Dead Store Elimination
Definition: DeadStoreElimination.cpp:2195
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:287
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::predecessors
auto predecessors(MachineBasicBlock *BB)
Definition: MachineSSAContext.h:32
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:932
llvm::Use::getUser
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:73
llvm::getUnderlyingObject
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value,...
Definition: ValueTracking.cpp:4280
llvm::MemoryLocation::UnknownSize
@ UnknownSize
Definition: MemoryLocation.h:214
SmallPtrSet.h
llvm::DominatorTreeBase::roots
iterator_range< root_iterator > roots()
Definition: GenericDomTree.h:304
PatternMatch.h
llvm::MemoryUseOrDef::getMemoryInst
Instruction * getMemoryInst() const
Get the instruction that this MemoryUse represents.
Definition: MemorySSA.h:253
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::Instruction::mayWriteToMemory
bool mayWriteToMemory() const
Return true if this instruction may modify memory.
Definition: Instruction.cpp:594
llvm::isModSet
LLVM_NODISCARD bool isModSet(const ModRefInfo MRI)
Definition: AliasAnalysis.h:196
llvm::getObjectSize
bool getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout &DL, const TargetLibraryInfo *TLI, ObjectSizeOpts Opts={})
Compute the size of the object pointed by Ptr.
Definition: MemoryBuiltins.cpp:479
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(DSELegacyPass, "dse", "Dead Store Elimination", false, false) INITIALIZE_PASS_END(DSELegacyPass
llvm::None
const NoneType None
Definition: None.h:23
llvm::MemoryAccess::getBlock
BasicBlock * getBlock() const
Definition: MemorySSA.h:158
memoryIsNotModifiedBetween
static bool memoryIsNotModifiedBetween(Instruction *FirstI, Instruction *SecondI, BatchAAResults &AA, const DataLayout &DL, DominatorTree *DT)
Returns true if the memory which is accessed by the second instruction is not modified between the fi...
Definition: DeadStoreElimination.cpp:390
isShortenableAtTheBeginning
static bool isShortenableAtTheBeginning(Instruction *I)
Returns true if the beginning of this instruction can be safely shortened in length.
Definition: DeadStoreElimination.cpp:205
llvm::MemSetInst
This class wraps the llvm.memset intrinsic.
Definition: IntrinsicInst.h:957
llvm::AliasResult::getOffset
constexpr int32_t getOffset() const
Definition: AliasAnalysis.h:118
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
LoopInfo.h
llvm::MemorySSAWalker::getClobberingMemoryAccess
MemoryAccess * getClobberingMemoryAccess(const Instruction *I)
Given a memory Mod/Ref/ModRef'ing instruction, calling this will give you the nearest dominating Memo...
Definition: MemorySSA.h:1040
BasicBlock.h
llvm::cl::opt< bool >
llvm::RISCVFenceField::O
@ O
Definition: RISCVBaseInfo.h:206
llvm::instructions
inst_range instructions(Function *F)
Definition: InstIterator.h:133
llvm::PatternMatch::m_Zero
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:535
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:309
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::Instruction::eraseFromParent
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:78
llvm::MemoryLocation::getOrNone
static Optional< MemoryLocation > getOrNone(const Instruction *Inst)
Definition: MemoryLocation.cpp:78
llvm::BatchAAResults::alias
AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
Definition: AliasAnalysis.h:956
llvm::MapVector::find
iterator find(const KeyT &Key)
Definition: MapVector.h:148
llvm::TargetLibraryInfoWrapperPass
Definition: TargetLibraryInfo.h:465
uint64_t
D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
llvm::MemorySSAUpdater
Definition: MemorySSAUpdater.h:55
Addr
uint64_t Addr
Definition: ELFObjHandler.cpp:79
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::PreservedAnalyses::preserve
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:176
llvm::Instruction::isIdenticalTo
bool isIdenticalTo(const Instruction *I) const
Return true if the specified instruction is exactly identical to the current one.
Definition: Instruction.cpp:500
MemoryLocation.h
llvm::DenseMap
Definition: DenseMap.h:714
llvm::MemorySSA
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:704
llvm::LoopInfoBase::getLoopFor
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:970
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::MemorySSA::getMemoryAccess
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
Definition: MemorySSA.h:721
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:441
llvm::SmallPtrSetImpl::begin
iterator begin() const
Definition: SmallPtrSet.h:402
llvm::MemoryDef
Represents a read-write access to memory, whether it is a must-alias, or a may-alias.
Definition: MemorySSA.h:376
llvm::Instruction::setDebugLoc
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:367
EnablePartialStoreMerging
static cl::opt< bool > EnablePartialStoreMerging("enable-dse-partial-store-merging", cl::init(true), cl::Hidden, cl::desc("Enable partial store merging in DSE"))
llvm::GetElementPtrInst::CreateInBounds
static GetElementPtrInst * CreateInBounds(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Create an "inbounds" getelementptr.
Definition: Instructions.h:986
llvm::DenseMapBase::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
memcpy
<%struct.s * > cast struct s *S to sbyte *< sbyte * > sbyte uint cast struct s *agg result to sbyte *< sbyte * > sbyte uint cast struct s *memtmp to sbyte *< sbyte * > sbyte uint ret void llc ends up issuing two memcpy or custom lower memcpy(of small size) to be ldmia/stmia. I think option 2 is better but the current register allocator cannot allocate a chunk of registers at a time. A feasible temporary solution is to use specific physical registers at the lowering time for small(<
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::DominatorTree::isReachableFromEntry
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:328
llvm::TargetLibraryInfo::has
bool has(LibFunc F) const
Tests whether a library function is available.
Definition: TargetLibraryInfo.h:325
MemorySSAOtherBBStepCost
static cl::opt< unsigned > MemorySSAOtherBBStepCost("dse-memoryssa-otherbb-cost", cl::init(5), cl::Hidden, cl::desc("The cost of a step in a different basic " "block than the killing MemoryDef" "(default = 5)"))
isMaskedStoreOverwrite
static OverwriteResult isMaskedStoreOverwrite(const Instruction *KillingI, const Instruction *DeadI, BatchAAResults &AA)
Check if two instruction are masked stores that completely overwrite one another.
Definition: DeadStoreElimination.cpp:240
llvm::salvageKnowledge
void salvageKnowledge(Instruction *I, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr)
Calls BuildAssumeFromInst and if the resulting llvm.assume is valid insert if before I.
Definition: AssumeBundleBuilder.cpp:292
llvm::PHITransAddr::PHITranslateValue
bool PHITranslateValue(BasicBlock *CurBB, BasicBlock *PredBB, const DominatorTree *DT, bool MustDominate)
PHITranslateValue - PHI translate the current address up the CFG from CurBB to Pred,...
Definition: PHITransAddr.cpp:313
EnablePartialOverwriteTracking
static cl::opt< bool > EnablePartialOverwriteTracking("enable-dse-partial-overwrite-tracking", cl::init(true), cl::Hidden, cl::desc("Enable partial-overwrite tracking in DSE"))
llvm::MemorySSAAnalysis
An analysis that produces MemorySSA for a function.
Definition: MemorySSA.h:930
llvm::MemorySSA::getWalker
MemorySSAWalker * getWalker()
Definition: MemorySSA.cpp:1599
llvm::AMDGPU::IsaInfo::TargetIDSetting::Off
@ Off
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
llvm::Optional::map
auto map(const Function &F) const LLVM_LVALUE_FUNCTION -> Optional< decltype(F(getValue()))>
Apply a function to the value if present; otherwise return None.
Definition: Optional.h:304
MemorySSAPathCheckLimit
static cl::opt< unsigned > MemorySSAPathCheckLimit("dse-memoryssa-path-check-limit", cl::init(50), cl::Hidden, cl::desc("The maximum number of blocks to check when trying to prove that " "all paths to an exit go through a killing block (default = 50)"))
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
llvm::isInstructionTriviallyDead
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition: Local.cpp:398
llvm::LoopInfo
Definition: LoopInfo.h:1086
tryToMergePartialOverlappingStores
static Constant * tryToMergePartialOverlappingStores(StoreInst *KillingI, StoreInst *DeadI, int64_t KillingOffset, int64_t DeadOffset, const DataLayout &DL, BatchAAResults &AA, DominatorTree *DT)
Definition: DeadStoreElimination.cpp:633
llvm::PointerMayBeCaptured
bool PointerMayBeCaptured(const Value *V, bool ReturnCaptures, bool StoreCaptures, unsigned MaxUsesToExplore=0)
PointerMayBeCaptured - Return true if this pointer value may be captured by the enclosing function (w...
Definition: CaptureTracking.cpp:215
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::IntervalMap::end
const_iterator end() const
Definition: IntervalMap.h:1112
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1656
DataLayout.h
llvm::MemorySSA::getSkipSelfWalker
MemorySSAWalker * getSkipSelfWalker()
Definition: MemorySSA.cpp:1614
llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:253
llvm::PostDominatorTree
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
Definition: PostDominators.h:28
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:134
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::CFGAnalyses
Represents analyses that only rely on functions' control flow.
Definition: PassManager.h:116
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:532
getParent
static const Function * getParent(const Value *V)
Definition: BasicAliasAnalysis.cpp:870
uint32_t
llvm::AliasResult::NoAlias
@ NoAlias
The two locations do not alias at all.
Definition: AliasAnalysis.h:99
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::AliasResult::MustAlias
@ MustAlias
The two locations precisely alias each other.
Definition: AliasAnalysis.h:106
llvm::ObjectSizeOpts
Various options to control the behavior of getObjectSize.
Definition: MemoryBuiltins.h:125
llvm::DominatorTreeBase::properlyDominates
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
Definition: GenericDomTree.h:392
llvm::Instruction::mayReadFromMemory
bool mayReadFromMemory() const
Return true if this instruction may read memory.
Definition: Instruction.cpp:574
llvm::MemoryAccess
Definition: MemorySSA.h:136
llvm::ifs::IFSSymbolType::Func
@ Func
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::insert
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:207
llvm::Value::stripPointerCasts
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:685
llvm::MapVector::end
iterator end()
Definition: MapVector.h:72
Argument.h
llvm::APInt::zext
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:952
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
llvm::Instruction::isAtomic
bool isAtomic() const
Return true if this instruction has an AtomicOrdering of unordered or higher.
Definition: Instruction.cpp:614
llvm::PHITransAddr::getAddr
Value * getAddr() const
Definition: PHITransAddr.h:59
Constant.h
llvm::BatchAAResults::isMustAlias
bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
Definition: AliasAnalysis.h:981
tryToShorten
static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart, uint64_t &DeadSize, int64_t KillingStart, uint64_t KillingSize, bool IsOverwriteEnd)
Definition: DeadStoreElimination.cpp:477
llvm::MemoryDependenceWrapperPass
A wrapper analysis pass for the legacy pass manager that exposes a MemoryDepnedenceResults instance.
Definition: MemoryDependenceAnalysis.h:525
llvm::MemIntrinsicBase::getLength
Value * getLength() const
Definition: IntrinsicInst.h:695
llvm::MemoryUseOrDef::getDefiningAccess
MemoryAccess * getDefiningAccess() const
Get the access that produces the memory state used by this Use.
Definition: MemorySSA.h:256
llvm::MemoryDef::setOptimized
void setOptimized(MemoryAccess *MA)
Definition: MemorySSA.h:396
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
llvm::initializeDSELegacyPassPass
void initializeDSELegacyPassPass(PassRegistry &)
llvm::DSEPass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
Definition: DeadStoreElimination.cpp:2104
llvm::Align::value
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
MemorySSADefsPerBlockLimit
static cl::opt< unsigned > MemorySSADefsPerBlockLimit("dse-memoryssa-defs-per-block-limit", cl::init(5000), cl::Hidden, cl::desc("The number of MemoryDefs we consider as candidates to eliminated " "other stores per basic block (default = 5000)"))
Casting.h
llvm::post_order
iterator_range< po_iterator< T > > post_order(const T &G)
Definition: PostOrderIterator.h:188
Function.h
OptimizeMemorySSA
static cl::opt< bool > OptimizeMemorySSA("dse-optimize-memoryssa", cl::init(true), cl::Hidden, cl::desc("Allow DSE to optimize memory accesses."))
DebugCounter.h
PassManager.h
llvm::salvageDebugInfo
void salvageDebugInfo(Instruction &I)
Assuming the instruction I is going to be deleted, attempt to salvage debug users of I by writing the...
Definition: Local.cpp:1745
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:221
llvm::GetPointerBaseWithConstantOffset
Value * GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, const DataLayout &DL, bool AllowNonInbounds=true)
Analyze the specified pointer to see if it can be expressed as a base pointer plus a constant offset.
Definition: ValueTracking.h:278
llvm::MemoryLocation::getWithNewPtr
MemoryLocation getWithNewPtr(const Value *NewPtr) const
Definition: MemoryLocation.h:293
MemorySSAPartialStoreLimit
static cl::opt< unsigned > MemorySSAPartialStoreLimit("dse-memoryssa-partial-store-limit", cl::init(5), cl::Hidden, cl::desc("The maximum number candidates that only partially overwrite the " "killing MemoryDef to consider" " (default = 5)"))
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:45
llvm::isStrongerThan
bool isStrongerThan(AtomicOrdering AO, AtomicOrdering Other)
Returns true if ao is stronger than other as defined by the AtomicOrdering lattice,...
Definition: AtomicOrdering.h:90
DEBUG_COUNTER
DEBUG_COUNTER(MemorySSACounter, "dse-memoryssa", "Controls which MemoryDefs are eliminated.")
llvm::DominatorTreeAnalysis
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:252
MemorySSA.h
Instructions.h
PostOrderIterator.h
llvm::PreservedAnalyses::preserveSet
void preserveSet()
Mark an analysis set as preserved.
Definition: PassManager.h:191
SmallVector.h
llvm::PatternMatch::m_Specific
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:780
llvm::isRefSet
LLVM_NODISCARD bool isRefSet(const ModRefInfo MRI)
Definition: AliasAnalysis.h:199
tryToShortenEnd
static bool tryToShortenEnd(Instruction *DeadI, OverlapIntervalsTy &IntervalMap, int64_t &DeadStart, uint64_t &DeadSize)
Definition: DeadStoreElimination.cpp:576
llvm::PatternMatch::m_ICmp
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
Definition: PatternMatch.h:1404
Dominators.h
llvm::MemoryLocation::getAfter
static MemoryLocation getAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location after Ptr, while remaining within the underlying objec...
Definition: MemoryLocation.h:269
llvm::CallBase::getArgOperand
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1343
llvm::SmallPtrSetImplBase::empty
LLVM_NODISCARD bool empty() const
Definition: SmallPtrSet.h:91
llvm::IntervalMap::begin
const_iterator begin() const
Definition: IntervalMap.h:1100
llvm::AAResultsWrapperPass
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Definition: AliasAnalysis.h:1335
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:94
llvm::GlobalsAAWrapperPass
Legacy wrapper pass to provide the GlobalsAAResult object.
Definition: GlobalsModRef.h:143
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
llvm::isStrongerThanMonotonic
bool isStrongerThanMonotonic(AtomicOrdering AO)
Definition: AtomicOrdering.h:124
llvm::IntervalMap
Definition: IntervalMap.h:938
isPartialOverwrite
static OverwriteResult isPartialOverwrite(const MemoryLocation &KillingLoc, const MemoryLocation &DeadLoc, int64_t KillingOff, int64_t DeadOff, Instruction *DeadI, InstOverlapIntervalsTy &IOL)
Return 'OW_Complete' if a store to the 'KillingLoc' location completely overwrites a store to the 'De...
Definition: DeadStoreElimination.cpp:272
llvm::Module::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:401
llvm::SmallSetVector
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:307
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1478
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:172
llvm::PostDominatorTreeAnalysis
Analysis pass which computes a PostDominatorTree.
Definition: PostDominators.h:47
deleteDeadInstruction
static void deleteDeadInstruction(Instruction *I)
Definition: LoopIdiomRecognize.cpp:348
llvm::CastInst::CreatePointerCast
static CastInst * CreatePointerCast(Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd)
Create a BitCast AddrSpaceCast, or a PtrToInt cast instruction.
Definition: Instructions.cpp:3244
llvm::MemoryLocation::getForDest
static MemoryLocation getForDest(const MemIntrinsic *MI)
Return a location representing the destination of a memory set or transfer.
Definition: MemoryLocation.cpp:108
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
LLVMContext.h
MemorySSAUpwardsStepLimit
static cl::opt< unsigned > MemorySSAUpwardsStepLimit("dse-memoryssa-walklimit", cl::init(90), cl::Hidden, cl::desc("The maximum number of steps while walking upwards to find " "MemoryDefs that may be killed (default = 90)"))
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::cl::desc
Definition: CommandLine.h:412
raw_ostream.h
llvm::NullPointerIsDefined
bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
Definition: Function.cpp:1985
llvm::PHITransAddr::NeedsPHITranslationFromBlock
bool NeedsPHITranslationFromBlock(BasicBlock *BB) const
NeedsPHITranslationFromBlock - Return true if moving from the specified BasicBlock to its predecessor...
Definition: PHITransAddr.h:63
llvm::SetVector
A vector that has set insertion semantics.
Definition: SetVector.h:40
llvm::SmallPtrSetImpl::contains
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:388
llvm::PostDominatorTree::dominates
bool dominates(const Instruction *I1, const Instruction *I2) const
Return true if I1 dominates I2.
Definition: PostDominators.cpp:54
Value.h
InitializePasses.h
isShortenableAtTheEnd
static bool isShortenableAtTheEnd(Instruction *I)
Returns true if the end of this instruction can be safely shortened in length.
Definition: DeadStoreElimination.cpp:180
BuildLibCalls.h
llvm::offsetToAlignment
uint64_t offsetToAlignment(uint64_t Value, Align Alignment)
Returns the offset to the next integer (mod 2**64) that is greater than or equal to Value and is a mu...
Definition: Alignment.h:196
llvm::mayContainIrreducibleControl
bool mayContainIrreducibleControl(const Function &F, const LoopInfo *LI)
Definition: MustExecute.cpp:482
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
Debug.h
llvm::TargetLibraryAnalysis
Analysis pass providing the TargetLibraryInfo.
Definition: TargetLibraryInfo.h:440
llvm::MemoryLocation
Representation for a specific memory location.
Definition: MemoryLocation.h:209
llvm::LoopAnalysis
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:1246
llvm::isFreeCall
const CallInst * isFreeCall(const Value *I, const TargetLibraryInfo *TLI)
isFreeCall - Returns non-null if the value is a call to the builtin free()
Definition: MemoryBuiltins.cpp:452
SetVector.h
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:44
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38