LLVM  16.0.0git
DeadStoreElimination.cpp
Go to the documentation of this file.
1 //===- DeadStoreElimination.cpp - MemorySSA Backed Dead Store Elimination -===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // The code below implements dead store elimination using MemorySSA. It uses
10 // the following general approach: given a MemoryDef, walk upwards to find
11 // clobbering MemoryDefs that may be killed by the starting def. Then check
12 // that there are no uses that may read the location of the original MemoryDef
13 // in between both MemoryDefs. A bit more concretely:
14 //
15 // For all MemoryDefs StartDef:
16 // 1. Get the next dominating clobbering MemoryDef (MaybeDeadAccess) by walking
17 // upwards.
18 // 2. Check that there are no reads between MaybeDeadAccess and the StartDef by
19 // checking all uses starting at MaybeDeadAccess and walking until we see
20 // StartDef.
21 // 3. For each found CurrentDef, check that:
22 // 1. There are no barrier instructions between CurrentDef and StartDef (like
23 // throws or stores with ordering constraints).
24 // 2. StartDef is executed whenever CurrentDef is executed.
25 // 3. StartDef completely overwrites CurrentDef.
26 // 4. Erase CurrentDef from the function and MemorySSA.
27 //
28 //===----------------------------------------------------------------------===//
29 
31 #include "llvm/ADT/APInt.h"
32 #include "llvm/ADT/DenseMap.h"
33 #include "llvm/ADT/MapVector.h"
35 #include "llvm/ADT/SetVector.h"
36 #include "llvm/ADT/SmallPtrSet.h"
37 #include "llvm/ADT/SmallVector.h"
38 #include "llvm/ADT/Statistic.h"
39 #include "llvm/ADT/StringRef.h"
45 #include "llvm/Analysis/LoopInfo.h"
54 #include "llvm/IR/Argument.h"
55 #include "llvm/IR/BasicBlock.h"
56 #include "llvm/IR/Constant.h"
57 #include "llvm/IR/Constants.h"
58 #include "llvm/IR/DataLayout.h"
59 #include "llvm/IR/Dominators.h"
60 #include "llvm/IR/Function.h"
61 #include "llvm/IR/IRBuilder.h"
62 #include "llvm/IR/InstIterator.h"
63 #include "llvm/IR/InstrTypes.h"
64 #include "llvm/IR/Instruction.h"
65 #include "llvm/IR/Instructions.h"
66 #include "llvm/IR/IntrinsicInst.h"
67 #include "llvm/IR/Module.h"
68 #include "llvm/IR/PassManager.h"
69 #include "llvm/IR/PatternMatch.h"
70 #include "llvm/IR/Value.h"
71 #include "llvm/InitializePasses.h"
72 #include "llvm/Pass.h"
73 #include "llvm/Support/Casting.h"
75 #include "llvm/Support/Debug.h"
79 #include "llvm/Transforms/Scalar.h"
83 #include <algorithm>
84 #include <cassert>
85 #include <cstdint>
86 #include <iterator>
87 #include <map>
88 #include <utility>
89 
90 using namespace llvm;
91 using namespace PatternMatch;
92 
93 #define DEBUG_TYPE "dse"
94 
95 STATISTIC(NumRemainingStores, "Number of stores remaining after DSE");
96 STATISTIC(NumRedundantStores, "Number of redundant stores deleted");
97 STATISTIC(NumFastStores, "Number of stores deleted");
98 STATISTIC(NumFastOther, "Number of other instrs removed");
99 STATISTIC(NumCompletePartials, "Number of stores dead by later partials");
100 STATISTIC(NumModifiedStores, "Number of stores modified");
101 STATISTIC(NumCFGChecks, "Number of stores modified");
102 STATISTIC(NumCFGTries, "Number of stores modified");
103 STATISTIC(NumCFGSuccess, "Number of stores modified");
104 STATISTIC(NumGetDomMemoryDefPassed,
105  "Number of times a valid candidate is returned from getDomMemoryDef");
106 STATISTIC(NumDomMemDefChecks,
107  "Number iterations check for reads in getDomMemoryDef");
108 
109 DEBUG_COUNTER(MemorySSACounter, "dse-memoryssa",
110  "Controls which MemoryDefs are eliminated.");
111 
112 static cl::opt<bool>
113 EnablePartialOverwriteTracking("enable-dse-partial-overwrite-tracking",
114  cl::init(true), cl::Hidden,
115  cl::desc("Enable partial-overwrite tracking in DSE"));
116 
117 static cl::opt<bool>
118 EnablePartialStoreMerging("enable-dse-partial-store-merging",
119  cl::init(true), cl::Hidden,
120  cl::desc("Enable partial store merging in DSE"));
121 
122 static cl::opt<unsigned>
123  MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(150), cl::Hidden,
124  cl::desc("The number of memory instructions to scan for "
125  "dead store elimination (default = 150)"));
127  "dse-memoryssa-walklimit", cl::init(90), cl::Hidden,
128  cl::desc("The maximum number of steps while walking upwards to find "
129  "MemoryDefs that may be killed (default = 90)"));
130 
132  "dse-memoryssa-partial-store-limit", cl::init(5), cl::Hidden,
133  cl::desc("The maximum number candidates that only partially overwrite the "
134  "killing MemoryDef to consider"
135  " (default = 5)"));
136 
138  "dse-memoryssa-defs-per-block-limit", cl::init(5000), cl::Hidden,
139  cl::desc("The number of MemoryDefs we consider as candidates to eliminated "
140  "other stores per basic block (default = 5000)"));
141 
143  "dse-memoryssa-samebb-cost", cl::init(1), cl::Hidden,
144  cl::desc(
145  "The cost of a step in the same basic block as the killing MemoryDef"
146  "(default = 1)"));
147 
148 static cl::opt<unsigned>
149  MemorySSAOtherBBStepCost("dse-memoryssa-otherbb-cost", cl::init(5),
150  cl::Hidden,
151  cl::desc("The cost of a step in a different basic "
152  "block than the killing MemoryDef"
153  "(default = 5)"));
154 
156  "dse-memoryssa-path-check-limit", cl::init(50), cl::Hidden,
157  cl::desc("The maximum number of blocks to check when trying to prove that "
158  "all paths to an exit go through a killing block (default = 50)"));
159 
160 // This flags allows or disallows DSE to optimize MemorySSA during its
161 // traversal. Note that DSE optimizing MemorySSA may impact other passes
162 // downstream of the DSE invocation and can lead to issues not being
163 // reproducible in isolation (i.e. when MemorySSA is built from scratch). In
164 // those cases, the flag can be used to check if DSE's MemorySSA optimizations
165 // impact follow-up passes.
166 static cl::opt<bool>
167  OptimizeMemorySSA("dse-optimize-memoryssa", cl::init(true), cl::Hidden,
168  cl::desc("Allow DSE to optimize memory accesses."));
169 
170 //===----------------------------------------------------------------------===//
171 // Helper functions
172 //===----------------------------------------------------------------------===//
173 using OverlapIntervalsTy = std::map<int64_t, int64_t>;
175 
176 /// Returns true if the end of this instruction can be safely shortened in
177 /// length.
179  // Don't shorten stores for now
180  if (isa<StoreInst>(I))
181  return false;
182 
183  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
184  switch (II->getIntrinsicID()) {
185  default: return false;
186  case Intrinsic::memset:
187  case Intrinsic::memcpy:
188  case Intrinsic::memcpy_element_unordered_atomic:
189  case Intrinsic::memset_element_unordered_atomic:
190  // Do shorten memory intrinsics.
191  // FIXME: Add memmove if it's also safe to transform.
192  return true;
193  }
194  }
195 
196  // Don't shorten libcalls calls for now.
197 
198  return false;
199 }
200 
201 /// Returns true if the beginning of this instruction can be safely shortened
202 /// in length.
204  // FIXME: Handle only memset for now. Supporting memcpy/memmove should be
205  // easily done by offsetting the source address.
206  return isa<AnyMemSetInst>(I);
207 }
208 
209 static uint64_t getPointerSize(const Value *V, const DataLayout &DL,
210  const TargetLibraryInfo &TLI,
211  const Function *F) {
212  uint64_t Size;
213  ObjectSizeOpts Opts;
215 
216  if (getObjectSize(V, Size, DL, &TLI, Opts))
217  return Size;
219 }
220 
221 namespace {
222 
223 enum OverwriteResult {
224  OW_Begin,
225  OW_Complete,
226  OW_End,
227  OW_PartialEarlierWithFullLater,
228  OW_MaybePartial,
229  OW_None,
230  OW_Unknown
231 };
232 
233 } // end anonymous namespace
234 
235 /// Check if two instruction are masked stores that completely
236 /// overwrite one another. More specifically, \p KillingI has to
237 /// overwrite \p DeadI.
238 static OverwriteResult isMaskedStoreOverwrite(const Instruction *KillingI,
239  const Instruction *DeadI,
240  BatchAAResults &AA) {
241  const auto *KillingII = dyn_cast<IntrinsicInst>(KillingI);
242  const auto *DeadII = dyn_cast<IntrinsicInst>(DeadI);
243  if (KillingII == nullptr || DeadII == nullptr)
244  return OW_Unknown;
245  if (KillingII->getIntrinsicID() != DeadII->getIntrinsicID())
246  return OW_Unknown;
247  if (KillingII->getIntrinsicID() == Intrinsic::masked_store) {
248  // Type size.
249  VectorType *KillingTy =
250  cast<VectorType>(KillingII->getArgOperand(0)->getType());
251  VectorType *DeadTy = cast<VectorType>(DeadII->getArgOperand(0)->getType());
252  if (KillingTy->getScalarSizeInBits() != DeadTy->getScalarSizeInBits())
253  return OW_Unknown;
254  // Element count.
255  if (KillingTy->getElementCount() != DeadTy->getElementCount())
256  return OW_Unknown;
257  // Pointers.
258  Value *KillingPtr = KillingII->getArgOperand(1)->stripPointerCasts();
259  Value *DeadPtr = DeadII->getArgOperand(1)->stripPointerCasts();
260  if (KillingPtr != DeadPtr && !AA.isMustAlias(KillingPtr, DeadPtr))
261  return OW_Unknown;
262  // Masks.
263  // TODO: check that KillingII's mask is a superset of the DeadII's mask.
264  if (KillingII->getArgOperand(3) != DeadII->getArgOperand(3))
265  return OW_Unknown;
266  return OW_Complete;
267  }
268  return OW_Unknown;
269 }
270 
271 /// Return 'OW_Complete' if a store to the 'KillingLoc' location completely
272 /// overwrites a store to the 'DeadLoc' location, 'OW_End' if the end of the
273 /// 'DeadLoc' location is completely overwritten by 'KillingLoc', 'OW_Begin'
274 /// if the beginning of the 'DeadLoc' location is overwritten by 'KillingLoc'.
275 /// 'OW_PartialEarlierWithFullLater' means that a dead (big) store was
276 /// overwritten by a killing (smaller) store which doesn't write outside the big
277 /// store's memory locations. Returns 'OW_Unknown' if nothing can be determined.
278 /// NOTE: This function must only be called if both \p KillingLoc and \p
279 /// DeadLoc belong to the same underlying object with valid \p KillingOff and
280 /// \p DeadOff.
281 static OverwriteResult isPartialOverwrite(const MemoryLocation &KillingLoc,
282  const MemoryLocation &DeadLoc,
283  int64_t KillingOff, int64_t DeadOff,
284  Instruction *DeadI,
285  InstOverlapIntervalsTy &IOL) {
286  const uint64_t KillingSize = KillingLoc.Size.getValue();
287  const uint64_t DeadSize = DeadLoc.Size.getValue();
288  // We may now overlap, although the overlap is not complete. There might also
289  // be other incomplete overlaps, and together, they might cover the complete
290  // dead store.
291  // Note: The correctness of this logic depends on the fact that this function
292  // is not even called providing DepWrite when there are any intervening reads.
294  KillingOff < int64_t(DeadOff + DeadSize) &&
295  int64_t(KillingOff + KillingSize) >= DeadOff) {
296 
297  // Insert our part of the overlap into the map.
298  auto &IM = IOL[DeadI];
299  LLVM_DEBUG(dbgs() << "DSE: Partial overwrite: DeadLoc [" << DeadOff << ", "
300  << int64_t(DeadOff + DeadSize) << ") KillingLoc ["
301  << KillingOff << ", " << int64_t(KillingOff + KillingSize)
302  << ")\n");
303 
304  // Make sure that we only insert non-overlapping intervals and combine
305  // adjacent intervals. The intervals are stored in the map with the ending
306  // offset as the key (in the half-open sense) and the starting offset as
307  // the value.
308  int64_t KillingIntStart = KillingOff;
309  int64_t KillingIntEnd = KillingOff + KillingSize;
310 
311  // Find any intervals ending at, or after, KillingIntStart which start
312  // before KillingIntEnd.
313  auto ILI = IM.lower_bound(KillingIntStart);
314  if (ILI != IM.end() && ILI->second <= KillingIntEnd) {
315  // This existing interval is overlapped with the current store somewhere
316  // in [KillingIntStart, KillingIntEnd]. Merge them by erasing the existing
317  // intervals and adjusting our start and end.
318  KillingIntStart = std::min(KillingIntStart, ILI->second);
319  KillingIntEnd = std::max(KillingIntEnd, ILI->first);
320  ILI = IM.erase(ILI);
321 
322  // Continue erasing and adjusting our end in case other previous
323  // intervals are also overlapped with the current store.
324  //
325  // |--- dead 1 ---| |--- dead 2 ---|
326  // |------- killing---------|
327  //
328  while (ILI != IM.end() && ILI->second <= KillingIntEnd) {
329  assert(ILI->second > KillingIntStart && "Unexpected interval");
330  KillingIntEnd = std::max(KillingIntEnd, ILI->first);
331  ILI = IM.erase(ILI);
332  }
333  }
334 
335  IM[KillingIntEnd] = KillingIntStart;
336 
337  ILI = IM.begin();
338  if (ILI->second <= DeadOff && ILI->first >= int64_t(DeadOff + DeadSize)) {
339  LLVM_DEBUG(dbgs() << "DSE: Full overwrite from partials: DeadLoc ["
340  << DeadOff << ", " << int64_t(DeadOff + DeadSize)
341  << ") Composite KillingLoc [" << ILI->second << ", "
342  << ILI->first << ")\n");
343  ++NumCompletePartials;
344  return OW_Complete;
345  }
346  }
347 
348  // Check for a dead store which writes to all the memory locations that
349  // the killing store writes to.
350  if (EnablePartialStoreMerging && KillingOff >= DeadOff &&
351  int64_t(DeadOff + DeadSize) > KillingOff &&
352  uint64_t(KillingOff - DeadOff) + KillingSize <= DeadSize) {
353  LLVM_DEBUG(dbgs() << "DSE: Partial overwrite a dead load [" << DeadOff
354  << ", " << int64_t(DeadOff + DeadSize)
355  << ") by a killing store [" << KillingOff << ", "
356  << int64_t(KillingOff + KillingSize) << ")\n");
357  // TODO: Maybe come up with a better name?
358  return OW_PartialEarlierWithFullLater;
359  }
360 
361  // Another interesting case is if the killing store overwrites the end of the
362  // dead store.
363  //
364  // |--dead--|
365  // |-- killing --|
366  //
367  // In this case we may want to trim the size of dead store to avoid
368  // generating stores to addresses which will definitely be overwritten killing
369  // store.
371  (KillingOff > DeadOff && KillingOff < int64_t(DeadOff + DeadSize) &&
372  int64_t(KillingOff + KillingSize) >= int64_t(DeadOff + DeadSize)))
373  return OW_End;
374 
375  // Finally, we also need to check if the killing store overwrites the
376  // beginning of the dead store.
377  //
378  // |--dead--|
379  // |-- killing --|
380  //
381  // In this case we may want to move the destination address and trim the size
382  // of dead store to avoid generating stores to addresses which will definitely
383  // be overwritten killing store.
385  (KillingOff <= DeadOff && int64_t(KillingOff + KillingSize) > DeadOff)) {
386  assert(int64_t(KillingOff + KillingSize) < int64_t(DeadOff + DeadSize) &&
387  "Expect to be handled as OW_Complete");
388  return OW_Begin;
389  }
390  // Otherwise, they don't completely overlap.
391  return OW_Unknown;
392 }
393 
394 /// Returns true if the memory which is accessed by the second instruction is not
395 /// modified between the first and the second instruction.
396 /// Precondition: Second instruction must be dominated by the first
397 /// instruction.
398 static bool
400  BatchAAResults &AA, const DataLayout &DL,
401  DominatorTree *DT) {
402  // Do a backwards scan through the CFG from SecondI to FirstI. Look for
403  // instructions which can modify the memory location accessed by SecondI.
404  //
405  // While doing the walk keep track of the address to check. It might be
406  // different in different basic blocks due to PHI translation.
407  using BlockAddressPair = std::pair<BasicBlock *, PHITransAddr>;
409  // Keep track of the address we visited each block with. Bail out if we
410  // visit a block with different addresses.
412 
413  BasicBlock::iterator FirstBBI(FirstI);
414  ++FirstBBI;
415  BasicBlock::iterator SecondBBI(SecondI);
416  BasicBlock *FirstBB = FirstI->getParent();
417  BasicBlock *SecondBB = SecondI->getParent();
418  MemoryLocation MemLoc;
419  if (auto *MemSet = dyn_cast<MemSetInst>(SecondI))
420  MemLoc = MemoryLocation::getForDest(MemSet);
421  else
422  MemLoc = MemoryLocation::get(SecondI);
423 
424  auto *MemLocPtr = const_cast<Value *>(MemLoc.Ptr);
425 
426  // Start checking the SecondBB.
427  WorkList.push_back(
428  std::make_pair(SecondBB, PHITransAddr(MemLocPtr, DL, nullptr)));
429  bool isFirstBlock = true;
430 
431  // Check all blocks going backward until we reach the FirstBB.
432  while (!WorkList.empty()) {
433  BlockAddressPair Current = WorkList.pop_back_val();
434  BasicBlock *B = Current.first;
435  PHITransAddr &Addr = Current.second;
436  Value *Ptr = Addr.getAddr();
437 
438  // Ignore instructions before FirstI if this is the FirstBB.
439  BasicBlock::iterator BI = (B == FirstBB ? FirstBBI : B->begin());
440 
442  if (isFirstBlock) {
443  // Ignore instructions after SecondI if this is the first visit of SecondBB.
444  assert(B == SecondBB && "first block is not the store block");
445  EI = SecondBBI;
446  isFirstBlock = false;
447  } else {
448  // It's not SecondBB or (in case of a loop) the second visit of SecondBB.
449  // In this case we also have to look at instructions after SecondI.
450  EI = B->end();
451  }
452  for (; BI != EI; ++BI) {
453  Instruction *I = &*BI;
454  if (I->mayWriteToMemory() && I != SecondI)
455  if (isModSet(AA.getModRefInfo(I, MemLoc.getWithNewPtr(Ptr))))
456  return false;
457  }
458  if (B != FirstBB) {
459  assert(B != &FirstBB->getParent()->getEntryBlock() &&
460  "Should not hit the entry block because SI must be dominated by LI");
461  for (BasicBlock *Pred : predecessors(B)) {
462  PHITransAddr PredAddr = Addr;
463  if (PredAddr.NeedsPHITranslationFromBlock(B)) {
464  if (!PredAddr.IsPotentiallyPHITranslatable())
465  return false;
466  if (PredAddr.PHITranslateValue(B, Pred, DT, false))
467  return false;
468  }
469  Value *TranslatedPtr = PredAddr.getAddr();
470  auto Inserted = Visited.insert(std::make_pair(Pred, TranslatedPtr));
471  if (!Inserted.second) {
472  // We already visited this block before. If it was with a different
473  // address - bail out!
474  if (TranslatedPtr != Inserted.first->second)
475  return false;
476  // ... otherwise just skip it.
477  continue;
478  }
479  WorkList.push_back(std::make_pair(Pred, PredAddr));
480  }
481  }
482  }
483  return true;
484 }
485 
486 static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart,
487  uint64_t &DeadSize, int64_t KillingStart,
488  uint64_t KillingSize, bool IsOverwriteEnd) {
489  auto *DeadIntrinsic = cast<AnyMemIntrinsic>(DeadI);
490  Align PrefAlign = DeadIntrinsic->getDestAlign().valueOrOne();
491 
492  // We assume that memet/memcpy operates in chunks of the "largest" native
493  // type size and aligned on the same value. That means optimal start and size
494  // of memset/memcpy should be modulo of preferred alignment of that type. That
495  // is it there is no any sense in trying to reduce store size any further
496  // since any "extra" stores comes for free anyway.
497  // On the other hand, maximum alignment we can achieve is limited by alignment
498  // of initial store.
499 
500  // TODO: Limit maximum alignment by preferred (or abi?) alignment of the
501  // "largest" native type.
502  // Note: What is the proper way to get that value?
503  // Should TargetTransformInfo::getRegisterBitWidth be used or anything else?
504  // PrefAlign = std::min(DL.getPrefTypeAlign(LargestType), PrefAlign);
505 
506  int64_t ToRemoveStart = 0;
507  uint64_t ToRemoveSize = 0;
508  // Compute start and size of the region to remove. Make sure 'PrefAlign' is
509  // maintained on the remaining store.
510  if (IsOverwriteEnd) {
511  // Calculate required adjustment for 'KillingStart' in order to keep
512  // remaining store size aligned on 'PerfAlign'.
513  uint64_t Off =
514  offsetToAlignment(uint64_t(KillingStart - DeadStart), PrefAlign);
515  ToRemoveStart = KillingStart + Off;
516  if (DeadSize <= uint64_t(ToRemoveStart - DeadStart))
517  return false;
518  ToRemoveSize = DeadSize - uint64_t(ToRemoveStart - DeadStart);
519  } else {
520  ToRemoveStart = DeadStart;
521  assert(KillingSize >= uint64_t(DeadStart - KillingStart) &&
522  "Not overlapping accesses?");
523  ToRemoveSize = KillingSize - uint64_t(DeadStart - KillingStart);
524  // Calculate required adjustment for 'ToRemoveSize'in order to keep
525  // start of the remaining store aligned on 'PerfAlign'.
526  uint64_t Off = offsetToAlignment(ToRemoveSize, PrefAlign);
527  if (Off != 0) {
528  if (ToRemoveSize <= (PrefAlign.value() - Off))
529  return false;
530  ToRemoveSize -= PrefAlign.value() - Off;
531  }
532  assert(isAligned(PrefAlign, ToRemoveSize) &&
533  "Should preserve selected alignment");
534  }
535 
536  assert(ToRemoveSize > 0 && "Shouldn't reach here if nothing to remove");
537  assert(DeadSize > ToRemoveSize && "Can't remove more than original size");
538 
539  uint64_t NewSize = DeadSize - ToRemoveSize;
540  if (auto *AMI = dyn_cast<AtomicMemIntrinsic>(DeadI)) {
541  // When shortening an atomic memory intrinsic, the newly shortened
542  // length must remain an integer multiple of the element size.
543  const uint32_t ElementSize = AMI->getElementSizeInBytes();
544  if (0 != NewSize % ElementSize)
545  return false;
546  }
547 
548  LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW "
549  << (IsOverwriteEnd ? "END" : "BEGIN") << ": " << *DeadI
550  << "\n KILLER [" << ToRemoveStart << ", "
551  << int64_t(ToRemoveStart + ToRemoveSize) << ")\n");
552 
553  Value *DeadWriteLength = DeadIntrinsic->getLength();
554  Value *TrimmedLength = ConstantInt::get(DeadWriteLength->getType(), NewSize);
555  DeadIntrinsic->setLength(TrimmedLength);
556  DeadIntrinsic->setDestAlignment(PrefAlign);
557 
558  if (!IsOverwriteEnd) {
559  Value *OrigDest = DeadIntrinsic->getRawDest();
560  Type *Int8PtrTy =
561  Type::getInt8PtrTy(DeadIntrinsic->getContext(),
562  OrigDest->getType()->getPointerAddressSpace());
563  Value *Dest = OrigDest;
564  if (OrigDest->getType() != Int8PtrTy)
565  Dest = CastInst::CreatePointerCast(OrigDest, Int8PtrTy, "", DeadI);
566  Value *Indices[1] = {
567  ConstantInt::get(DeadWriteLength->getType(), ToRemoveSize)};
569  Type::getInt8Ty(DeadIntrinsic->getContext()), Dest, Indices, "", DeadI);
570  NewDestGEP->setDebugLoc(DeadIntrinsic->getDebugLoc());
571  if (NewDestGEP->getType() != OrigDest->getType())
572  NewDestGEP = CastInst::CreatePointerCast(NewDestGEP, OrigDest->getType(),
573  "", DeadI);
574  DeadIntrinsic->setDest(NewDestGEP);
575  }
576 
577  // Finally update start and size of dead access.
578  if (!IsOverwriteEnd)
579  DeadStart += ToRemoveSize;
580  DeadSize = NewSize;
581 
582  return true;
583 }
584 
586  int64_t &DeadStart, uint64_t &DeadSize) {
587  if (IntervalMap.empty() || !isShortenableAtTheEnd(DeadI))
588  return false;
589 
590  OverlapIntervalsTy::iterator OII = --IntervalMap.end();
591  int64_t KillingStart = OII->second;
592  uint64_t KillingSize = OII->first - KillingStart;
593 
594  assert(OII->first - KillingStart >= 0 && "Size expected to be positive");
595 
596  if (KillingStart > DeadStart &&
597  // Note: "KillingStart - KillingStart" is known to be positive due to
598  // preceding check.
599  (uint64_t)(KillingStart - DeadStart) < DeadSize &&
600  // Note: "DeadSize - (uint64_t)(KillingStart - DeadStart)" is known to
601  // be non negative due to preceding checks.
602  KillingSize >= DeadSize - (uint64_t)(KillingStart - DeadStart)) {
603  if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
604  true)) {
605  IntervalMap.erase(OII);
606  return true;
607  }
608  }
609  return false;
610 }
611 
612 static bool tryToShortenBegin(Instruction *DeadI,
614  int64_t &DeadStart, uint64_t &DeadSize) {
616  return false;
617 
618  OverlapIntervalsTy::iterator OII = IntervalMap.begin();
619  int64_t KillingStart = OII->second;
620  uint64_t KillingSize = OII->first - KillingStart;
621 
622  assert(OII->first - KillingStart >= 0 && "Size expected to be positive");
623 
624  if (KillingStart <= DeadStart &&
625  // Note: "DeadStart - KillingStart" is known to be non negative due to
626  // preceding check.
627  KillingSize > (uint64_t)(DeadStart - KillingStart)) {
628  // Note: "KillingSize - (uint64_t)(DeadStart - DeadStart)" is known to
629  // be positive due to preceding checks.
630  assert(KillingSize - (uint64_t)(DeadStart - KillingStart) < DeadSize &&
631  "Should have been handled as OW_Complete");
632  if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
633  false)) {
634  IntervalMap.erase(OII);
635  return true;
636  }
637  }
638  return false;
639 }
640 
641 static Constant *
643  int64_t KillingOffset, int64_t DeadOffset,
644  const DataLayout &DL, BatchAAResults &AA,
645  DominatorTree *DT) {
646 
647  if (DeadI && isa<ConstantInt>(DeadI->getValueOperand()) &&
648  DL.typeSizeEqualsStoreSize(DeadI->getValueOperand()->getType()) &&
649  KillingI && isa<ConstantInt>(KillingI->getValueOperand()) &&
650  DL.typeSizeEqualsStoreSize(KillingI->getValueOperand()->getType()) &&
651  memoryIsNotModifiedBetween(DeadI, KillingI, AA, DL, DT)) {
652  // If the store we find is:
653  // a) partially overwritten by the store to 'Loc'
654  // b) the killing store is fully contained in the dead one and
655  // c) they both have a constant value
656  // d) none of the two stores need padding
657  // Merge the two stores, replacing the dead store's value with a
658  // merge of both values.
659  // TODO: Deal with other constant types (vectors, etc), and probably
660  // some mem intrinsics (if needed)
661 
662  APInt DeadValue = cast<ConstantInt>(DeadI->getValueOperand())->getValue();
663  APInt KillingValue =
664  cast<ConstantInt>(KillingI->getValueOperand())->getValue();
665  unsigned KillingBits = KillingValue.getBitWidth();
666  assert(DeadValue.getBitWidth() > KillingValue.getBitWidth());
667  KillingValue = KillingValue.zext(DeadValue.getBitWidth());
668 
669  // Offset of the smaller store inside the larger store
670  unsigned BitOffsetDiff = (KillingOffset - DeadOffset) * 8;
671  unsigned LShiftAmount =
672  DL.isBigEndian() ? DeadValue.getBitWidth() - BitOffsetDiff - KillingBits
673  : BitOffsetDiff;
674  APInt Mask = APInt::getBitsSet(DeadValue.getBitWidth(), LShiftAmount,
675  LShiftAmount + KillingBits);
676  // Clear the bits we'll be replacing, then OR with the smaller
677  // store, shifted appropriately.
678  APInt Merged = (DeadValue & ~Mask) | (KillingValue << LShiftAmount);
679  LLVM_DEBUG(dbgs() << "DSE: Merge Stores:\n Dead: " << *DeadI
680  << "\n Killing: " << *KillingI
681  << "\n Merged Value: " << Merged << '\n');
682  return ConstantInt::get(DeadI->getValueOperand()->getType(), Merged);
683  }
684  return nullptr;
685 }
686 
687 namespace {
688 // Returns true if \p I is an intrisnic that does not read or write memory.
689 bool isNoopIntrinsic(Instruction *I) {
690  if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
691  switch (II->getIntrinsicID()) {
692  case Intrinsic::lifetime_start:
693  case Intrinsic::lifetime_end:
694  case Intrinsic::invariant_end:
695  case Intrinsic::launder_invariant_group:
696  case Intrinsic::assume:
697  return true;
698  case Intrinsic::dbg_addr:
699  case Intrinsic::dbg_declare:
700  case Intrinsic::dbg_label:
701  case Intrinsic::dbg_value:
702  llvm_unreachable("Intrinsic should not be modeled in MemorySSA");
703  default:
704  return false;
705  }
706  }
707  return false;
708 }
709 
710 // Check if we can ignore \p D for DSE.
711 bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) {
712  Instruction *DI = D->getMemoryInst();
713  // Calls that only access inaccessible memory cannot read or write any memory
714  // locations we consider for elimination.
715  if (auto *CB = dyn_cast<CallBase>(DI))
716  if (CB->onlyAccessesInaccessibleMemory())
717  return true;
718 
719  // We can eliminate stores to locations not visible to the caller across
720  // throwing instructions.
721  if (DI->mayThrow() && !DefVisibleToCaller)
722  return true;
723 
724  // We can remove the dead stores, irrespective of the fence and its ordering
725  // (release/acquire/seq_cst). Fences only constraints the ordering of
726  // already visible stores, it does not make a store visible to other
727  // threads. So, skipping over a fence does not change a store from being
728  // dead.
729  if (isa<FenceInst>(DI))
730  return true;
731 
732  // Skip intrinsics that do not really read or modify memory.
733  if (isNoopIntrinsic(DI))
734  return true;
735 
736  return false;
737 }
738 
739 struct DSEState {
740  Function &F;
741  AliasAnalysis &AA;
743 
744  /// The single BatchAA instance that is used to cache AA queries. It will
745  /// not be invalidated over the whole run. This is safe, because:
746  /// 1. Only memory writes are removed, so the alias cache for memory
747  /// locations remains valid.
748  /// 2. No new instructions are added (only instructions removed), so cached
749  /// information for a deleted value cannot be accessed by a re-used new
750  /// value pointer.
751  BatchAAResults BatchAA;
752 
753  MemorySSA &MSSA;
754  DominatorTree &DT;
755  PostDominatorTree &PDT;
756  const TargetLibraryInfo &TLI;
757  const DataLayout &DL;
758  const LoopInfo &LI;
759 
760  // Whether the function contains any irreducible control flow, useful for
761  // being accurately able to detect loops.
762  bool ContainsIrreducibleLoops;
763 
764  // All MemoryDefs that potentially could kill other MemDefs.
766  // Any that should be skipped as they are already deleted
768  // Keep track whether a given object is captured before return or not.
769  DenseMap<const Value *, bool> CapturedBeforeReturn;
770  // Keep track of all of the objects that are invisible to the caller after
771  // the function returns.
772  DenseMap<const Value *, bool> InvisibleToCallerAfterRet;
773  // Keep track of blocks with throwing instructions not modeled in MemorySSA.
774  SmallPtrSet<BasicBlock *, 16> ThrowingBlocks;
775  // Post-order numbers for each basic block. Used to figure out if memory
776  // accesses are executed before another access.
777  DenseMap<BasicBlock *, unsigned> PostOrderNumbers;
778  // Values that are only used with assumes. Used to refine pointer escape
779  // analysis.
781 
782  /// Keep track of instructions (partly) overlapping with killing MemoryDefs per
783  /// basic block.
785  // Check if there are root nodes that are terminated by UnreachableInst.
786  // Those roots pessimize post-dominance queries. If there are such roots,
787  // fall back to CFG scan starting from all non-unreachable roots.
788  bool AnyUnreachableExit;
789 
790  // Whether or not we should iterate on removing dead stores at the end of the
791  // function due to removing a store causing a previously captured pointer to
792  // no longer be captured.
793  bool ShouldIterateEndOfFunctionDSE;
794 
795  // Class contains self-reference, make sure it's not copied/moved.
796  DSEState(const DSEState &) = delete;
797  DSEState &operator=(const DSEState &) = delete;
798 
799  DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT,
801  const TargetLibraryInfo &TLI, const LoopInfo &LI)
802  : F(F), AA(AA), EI(DT, LI, EphValues), BatchAA(AA, &EI), MSSA(MSSA),
803  DT(DT), PDT(PDT), TLI(TLI), DL(F.getParent()->getDataLayout()), LI(LI) {
804  // Collect blocks with throwing instructions not modeled in MemorySSA and
805  // alloc-like objects.
806  unsigned PO = 0;
807  for (BasicBlock *BB : post_order(&F)) {
808  PostOrderNumbers[BB] = PO++;
809  for (Instruction &I : *BB) {
810  MemoryAccess *MA = MSSA.getMemoryAccess(&I);
811  if (I.mayThrow() && !MA)
812  ThrowingBlocks.insert(I.getParent());
813 
814  auto *MD = dyn_cast_or_null<MemoryDef>(MA);
815  if (MD && MemDefs.size() < MemorySSADefsPerBlockLimit &&
816  (getLocForWrite(&I) || isMemTerminatorInst(&I)))
817  MemDefs.push_back(MD);
818  }
819  }
820 
821  // Treat byval or inalloca arguments the same as Allocas, stores to them are
822  // dead at the end of the function.
823  for (Argument &AI : F.args())
824  if (AI.hasPassPointeeByValueCopyAttr())
825  InvisibleToCallerAfterRet.insert({&AI, true});
826 
827  // Collect whether there is any irreducible control flow in the function.
828  ContainsIrreducibleLoops = mayContainIrreducibleControl(F, &LI);
829 
830  AnyUnreachableExit = any_of(PDT.roots(), [](const BasicBlock *E) {
831  return isa<UnreachableInst>(E->getTerminator());
832  });
833 
834  CodeMetrics::collectEphemeralValues(&F, &AC, EphValues);
835  }
836 
837  /// Return 'OW_Complete' if a store to the 'KillingLoc' location (by \p
838  /// KillingI instruction) completely overwrites a store to the 'DeadLoc'
839  /// location (by \p DeadI instruction).
840  /// Return OW_MaybePartial if \p KillingI does not completely overwrite
841  /// \p DeadI, but they both write to the same underlying object. In that
842  /// case, use isPartialOverwrite to check if \p KillingI partially overwrites
843  /// \p DeadI. Returns 'OR_None' if \p KillingI is known to not overwrite the
844  /// \p DeadI. Returns 'OW_Unknown' if nothing can be determined.
845  OverwriteResult isOverwrite(const Instruction *KillingI,
846  const Instruction *DeadI,
847  const MemoryLocation &KillingLoc,
848  const MemoryLocation &DeadLoc,
849  int64_t &KillingOff, int64_t &DeadOff) {
850  // AliasAnalysis does not always account for loops. Limit overwrite checks
851  // to dependencies for which we can guarantee they are independent of any
852  // loops they are in.
853  if (!isGuaranteedLoopIndependent(DeadI, KillingI, DeadLoc))
854  return OW_Unknown;
855 
856  const Value *DeadPtr = DeadLoc.Ptr->stripPointerCasts();
857  const Value *KillingPtr = KillingLoc.Ptr->stripPointerCasts();
858  const Value *DeadUndObj = getUnderlyingObject(DeadPtr);
859  const Value *KillingUndObj = getUnderlyingObject(KillingPtr);
860 
861  // Check whether the killing store overwrites the whole object, in which
862  // case the size/offset of the dead store does not matter.
863  if (DeadUndObj == KillingUndObj && KillingLoc.Size.isPrecise()) {
864  uint64_t KillingUndObjSize = getPointerSize(KillingUndObj, DL, TLI, &F);
865  if (KillingUndObjSize != MemoryLocation::UnknownSize &&
866  KillingUndObjSize == KillingLoc.Size.getValue())
867  return OW_Complete;
868  }
869 
870  // FIXME: Vet that this works for size upper-bounds. Seems unlikely that we'll
871  // get imprecise values here, though (except for unknown sizes).
872  if (!KillingLoc.Size.isPrecise() || !DeadLoc.Size.isPrecise()) {
873  // In case no constant size is known, try to an IR values for the number
874  // of bytes written and check if they match.
875  const auto *KillingMemI = dyn_cast<MemIntrinsic>(KillingI);
876  const auto *DeadMemI = dyn_cast<MemIntrinsic>(DeadI);
877  if (KillingMemI && DeadMemI) {
878  const Value *KillingV = KillingMemI->getLength();
879  const Value *DeadV = DeadMemI->getLength();
880  if (KillingV == DeadV && BatchAA.isMustAlias(DeadLoc, KillingLoc))
881  return OW_Complete;
882  }
883 
884  // Masked stores have imprecise locations, but we can reason about them
885  // to some extent.
886  return isMaskedStoreOverwrite(KillingI, DeadI, BatchAA);
887  }
888 
889  const uint64_t KillingSize = KillingLoc.Size.getValue();
890  const uint64_t DeadSize = DeadLoc.Size.getValue();
891 
892  // Query the alias information
893  AliasResult AAR = BatchAA.alias(KillingLoc, DeadLoc);
894 
895  // If the start pointers are the same, we just have to compare sizes to see if
896  // the killing store was larger than the dead store.
897  if (AAR == AliasResult::MustAlias) {
898  // Make sure that the KillingSize size is >= the DeadSize size.
899  if (KillingSize >= DeadSize)
900  return OW_Complete;
901  }
902 
903  // If we hit a partial alias we may have a full overwrite
904  if (AAR == AliasResult::PartialAlias && AAR.hasOffset()) {
905  int32_t Off = AAR.getOffset();
906  if (Off >= 0 && (uint64_t)Off + DeadSize <= KillingSize)
907  return OW_Complete;
908  }
909 
910  // If we can't resolve the same pointers to the same object, then we can't
911  // analyze them at all.
912  if (DeadUndObj != KillingUndObj) {
913  // Non aliasing stores to different objects don't overlap. Note that
914  // if the killing store is known to overwrite whole object (out of
915  // bounds access overwrites whole object as well) then it is assumed to
916  // completely overwrite any store to the same object even if they don't
917  // actually alias (see next check).
918  if (AAR == AliasResult::NoAlias)
919  return OW_None;
920  return OW_Unknown;
921  }
922 
923  // Okay, we have stores to two completely different pointers. Try to
924  // decompose the pointer into a "base + constant_offset" form. If the base
925  // pointers are equal, then we can reason about the two stores.
926  DeadOff = 0;
927  KillingOff = 0;
928  const Value *DeadBasePtr =
929  GetPointerBaseWithConstantOffset(DeadPtr, DeadOff, DL);
930  const Value *KillingBasePtr =
931  GetPointerBaseWithConstantOffset(KillingPtr, KillingOff, DL);
932 
933  // If the base pointers still differ, we have two completely different
934  // stores.
935  if (DeadBasePtr != KillingBasePtr)
936  return OW_Unknown;
937 
938  // The killing access completely overlaps the dead store if and only if
939  // both start and end of the dead one is "inside" the killing one:
940  // |<->|--dead--|<->|
941  // |-----killing------|
942  // Accesses may overlap if and only if start of one of them is "inside"
943  // another one:
944  // |<->|--dead--|<-------->|
945  // |-------killing--------|
946  // OR
947  // |-------dead-------|
948  // |<->|---killing---|<----->|
949  //
950  // We have to be careful here as *Off is signed while *.Size is unsigned.
951 
952  // Check if the dead access starts "not before" the killing one.
953  if (DeadOff >= KillingOff) {
954  // If the dead access ends "not after" the killing access then the
955  // dead one is completely overwritten by the killing one.
956  if (uint64_t(DeadOff - KillingOff) + DeadSize <= KillingSize)
957  return OW_Complete;
958  // If start of the dead access is "before" end of the killing access
959  // then accesses overlap.
960  else if ((uint64_t)(DeadOff - KillingOff) < KillingSize)
961  return OW_MaybePartial;
962  }
963  // If start of the killing access is "before" end of the dead access then
964  // accesses overlap.
965  else if ((uint64_t)(KillingOff - DeadOff) < DeadSize) {
966  return OW_MaybePartial;
967  }
968 
969  // Can reach here only if accesses are known not to overlap.
970  return OW_None;
971  }
972 
973  bool isInvisibleToCallerAfterRet(const Value *V) {
974  if (isa<AllocaInst>(V))
975  return true;
976  auto I = InvisibleToCallerAfterRet.insert({V, false});
977  if (I.second) {
978  if (!isInvisibleToCallerOnUnwind(V)) {
979  I.first->second = false;
980  } else if (isNoAliasCall(V)) {
981  I.first->second = !PointerMayBeCaptured(V, true, false, EphValues);
982  }
983  }
984  return I.first->second;
985  }
986 
987  bool isInvisibleToCallerOnUnwind(const Value *V) {
988  bool RequiresNoCaptureBeforeUnwind;
989  if (!isNotVisibleOnUnwind(V, RequiresNoCaptureBeforeUnwind))
990  return false;
991  if (!RequiresNoCaptureBeforeUnwind)
992  return true;
993 
994  auto I = CapturedBeforeReturn.insert({V, true});
995  if (I.second)
996  // NOTE: This could be made more precise by PointerMayBeCapturedBefore
997  // with the killing MemoryDef. But we refrain from doing so for now to
998  // limit compile-time and this does not cause any changes to the number
999  // of stores removed on a large test set in practice.
1000  I.first->second = PointerMayBeCaptured(V, false, true, EphValues);
1001  return !I.first->second;
1002  }
1003 
1004  Optional<MemoryLocation> getLocForWrite(Instruction *I) const {
1005  if (!I->mayWriteToMemory())
1006  return None;
1007 
1008  if (auto *CB = dyn_cast<CallBase>(I))
1009  return MemoryLocation::getForDest(CB, TLI);
1010 
1011  return MemoryLocation::getOrNone(I);
1012  }
1013 
1014  /// Assuming this instruction has a dead analyzable write, can we delete
1015  /// this instruction?
1016  bool isRemovable(Instruction *I) {
1017  assert(getLocForWrite(I) && "Must have analyzable write");
1018 
1019  // Don't remove volatile/atomic stores.
1020  if (StoreInst *SI = dyn_cast<StoreInst>(I))
1021  return SI->isUnordered();
1022 
1023  if (auto *CB = dyn_cast<CallBase>(I)) {
1024  // Don't remove volatile memory intrinsics.
1025  if (auto *MI = dyn_cast<MemIntrinsic>(CB))
1026  return !MI->isVolatile();
1027 
1028  // Never remove dead lifetime intrinsics, e.g. because they are followed
1029  // by a free.
1030  if (CB->isLifetimeStartOrEnd())
1031  return false;
1032 
1033  return CB->use_empty() && CB->willReturn() && CB->doesNotThrow() &&
1034  !CB->isTerminator();
1035  }
1036 
1037  return false;
1038  }
1039 
1040  /// Returns true if \p UseInst completely overwrites \p DefLoc
1041  /// (stored by \p DefInst).
1042  bool isCompleteOverwrite(const MemoryLocation &DefLoc, Instruction *DefInst,
1043  Instruction *UseInst) {
1044  // UseInst has a MemoryDef associated in MemorySSA. It's possible for a
1045  // MemoryDef to not write to memory, e.g. a volatile load is modeled as a
1046  // MemoryDef.
1047  if (!UseInst->mayWriteToMemory())
1048  return false;
1049 
1050  if (auto *CB = dyn_cast<CallBase>(UseInst))
1051  if (CB->onlyAccessesInaccessibleMemory())
1052  return false;
1053 
1054  int64_t InstWriteOffset, DepWriteOffset;
1055  if (auto CC = getLocForWrite(UseInst))
1056  return isOverwrite(UseInst, DefInst, *CC, DefLoc, InstWriteOffset,
1057  DepWriteOffset) == OW_Complete;
1058  return false;
1059  }
1060 
1061  /// Returns true if \p Def is not read before returning from the function.
1062  bool isWriteAtEndOfFunction(MemoryDef *Def) {
1063  LLVM_DEBUG(dbgs() << " Check if def " << *Def << " ("
1064  << *Def->getMemoryInst()
1065  << ") is at the end the function \n");
1066 
1067  auto MaybeLoc = getLocForWrite(Def->getMemoryInst());
1068  if (!MaybeLoc) {
1069  LLVM_DEBUG(dbgs() << " ... could not get location for write.\n");
1070  return false;
1071  }
1072 
1075  auto PushMemUses = [&WorkList, &Visited](MemoryAccess *Acc) {
1076  if (!Visited.insert(Acc).second)
1077  return;
1078  for (Use &U : Acc->uses())
1079  WorkList.push_back(cast<MemoryAccess>(U.getUser()));
1080  };
1081  PushMemUses(Def);
1082  for (unsigned I = 0; I < WorkList.size(); I++) {
1083  if (WorkList.size() >= MemorySSAScanLimit) {
1084  LLVM_DEBUG(dbgs() << " ... hit exploration limit.\n");
1085  return false;
1086  }
1087 
1088  MemoryAccess *UseAccess = WorkList[I];
1089  if (isa<MemoryPhi>(UseAccess)) {
1090  // AliasAnalysis does not account for loops. Limit elimination to
1091  // candidates for which we can guarantee they always store to the same
1092  // memory location.
1093  if (!isGuaranteedLoopInvariant(MaybeLoc->Ptr))
1094  return false;
1095 
1096  PushMemUses(cast<MemoryPhi>(UseAccess));
1097  continue;
1098  }
1099  // TODO: Checking for aliasing is expensive. Consider reducing the amount
1100  // of times this is called and/or caching it.
1101  Instruction *UseInst = cast<MemoryUseOrDef>(UseAccess)->getMemoryInst();
1102  if (isReadClobber(*MaybeLoc, UseInst)) {
1103  LLVM_DEBUG(dbgs() << " ... hit read clobber " << *UseInst << ".\n");
1104  return false;
1105  }
1106 
1107  if (MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess))
1108  PushMemUses(UseDef);
1109  }
1110  return true;
1111  }
1112 
1113  /// If \p I is a memory terminator like llvm.lifetime.end or free, return a
1114  /// pair with the MemoryLocation terminated by \p I and a boolean flag
1115  /// indicating whether \p I is a free-like call.
1117  getLocForTerminator(Instruction *I) const {
1118  uint64_t Len;
1119  Value *Ptr;
1120  if (match(I, m_Intrinsic<Intrinsic::lifetime_end>(m_ConstantInt(Len),
1121  m_Value(Ptr))))
1122  return {std::make_pair(MemoryLocation(Ptr, Len), false)};
1123 
1124  if (auto *CB = dyn_cast<CallBase>(I)) {
1125  if (Value *FreedOp = getFreedOperand(CB, &TLI))
1126  return {std::make_pair(MemoryLocation::getAfter(FreedOp), true)};
1127  }
1128 
1129  return None;
1130  }
1131 
1132  /// Returns true if \p I is a memory terminator instruction like
1133  /// llvm.lifetime.end or free.
1134  bool isMemTerminatorInst(Instruction *I) const {
1135  auto *CB = dyn_cast<CallBase>(I);
1136  return CB && (CB->getIntrinsicID() == Intrinsic::lifetime_end ||
1137  getFreedOperand(CB, &TLI) != nullptr);
1138  }
1139 
1140  /// Returns true if \p MaybeTerm is a memory terminator for \p Loc from
1141  /// instruction \p AccessI.
1142  bool isMemTerminator(const MemoryLocation &Loc, Instruction *AccessI,
1143  Instruction *MaybeTerm) {
1145  getLocForTerminator(MaybeTerm);
1146 
1147  if (!MaybeTermLoc)
1148  return false;
1149 
1150  // If the terminator is a free-like call, all accesses to the underlying
1151  // object can be considered terminated.
1152  if (getUnderlyingObject(Loc.Ptr) !=
1153  getUnderlyingObject(MaybeTermLoc->first.Ptr))
1154  return false;
1155 
1156  auto TermLoc = MaybeTermLoc->first;
1157  if (MaybeTermLoc->second) {
1158  const Value *LocUO = getUnderlyingObject(Loc.Ptr);
1159  return BatchAA.isMustAlias(TermLoc.Ptr, LocUO);
1160  }
1161  int64_t InstWriteOffset = 0;
1162  int64_t DepWriteOffset = 0;
1163  return isOverwrite(MaybeTerm, AccessI, TermLoc, Loc, InstWriteOffset,
1164  DepWriteOffset) == OW_Complete;
1165  }
1166 
1167  // Returns true if \p Use may read from \p DefLoc.
1168  bool isReadClobber(const MemoryLocation &DefLoc, Instruction *UseInst) {
1169  if (isNoopIntrinsic(UseInst))
1170  return false;
1171 
1172  // Monotonic or weaker atomic stores can be re-ordered and do not need to be
1173  // treated as read clobber.
1174  if (auto SI = dyn_cast<StoreInst>(UseInst))
1175  return isStrongerThan(SI->getOrdering(), AtomicOrdering::Monotonic);
1176 
1177  if (!UseInst->mayReadFromMemory())
1178  return false;
1179 
1180  if (auto *CB = dyn_cast<CallBase>(UseInst))
1181  if (CB->onlyAccessesInaccessibleMemory())
1182  return false;
1183 
1184  return isRefSet(BatchAA.getModRefInfo(UseInst, DefLoc));
1185  }
1186 
1187  /// Returns true if a dependency between \p Current and \p KillingDef is
1188  /// guaranteed to be loop invariant for the loops that they are in. Either
1189  /// because they are known to be in the same block, in the same loop level or
1190  /// by guaranteeing that \p CurrentLoc only references a single MemoryLocation
1191  /// during execution of the containing function.
1192  bool isGuaranteedLoopIndependent(const Instruction *Current,
1193  const Instruction *KillingDef,
1194  const MemoryLocation &CurrentLoc) {
1195  // If the dependency is within the same block or loop level (being careful
1196  // of irreducible loops), we know that AA will return a valid result for the
1197  // memory dependency. (Both at the function level, outside of any loop,
1198  // would also be valid but we currently disable that to limit compile time).
1199  if (Current->getParent() == KillingDef->getParent())
1200  return true;
1201  const Loop *CurrentLI = LI.getLoopFor(Current->getParent());
1202  if (!ContainsIrreducibleLoops && CurrentLI &&
1203  CurrentLI == LI.getLoopFor(KillingDef->getParent()))
1204  return true;
1205  // Otherwise check the memory location is invariant to any loops.
1206  return isGuaranteedLoopInvariant(CurrentLoc.Ptr);
1207  }
1208 
1209  /// Returns true if \p Ptr is guaranteed to be loop invariant for any possible
1210  /// loop. In particular, this guarantees that it only references a single
1211  /// MemoryLocation during execution of the containing function.
1212  bool isGuaranteedLoopInvariant(const Value *Ptr) {
1213  Ptr = Ptr->stripPointerCasts();
1214  if (auto *GEP = dyn_cast<GEPOperator>(Ptr))
1215  if (GEP->hasAllConstantIndices())
1216  Ptr = GEP->getPointerOperand()->stripPointerCasts();
1217 
1218  if (auto *I = dyn_cast<Instruction>(Ptr))
1219  return I->getParent()->isEntryBlock();
1220  return true;
1221  }
1222 
1223  // Find a MemoryDef writing to \p KillingLoc and dominating \p StartAccess,
1224  // with no read access between them or on any other path to a function exit
1225  // block if \p KillingLoc is not accessible after the function returns. If
1226  // there is no such MemoryDef, return None. The returned value may not
1227  // (completely) overwrite \p KillingLoc. Currently we bail out when we
1228  // encounter an aliasing MemoryUse (read).
1230  getDomMemoryDef(MemoryDef *KillingDef, MemoryAccess *StartAccess,
1231  const MemoryLocation &KillingLoc, const Value *KillingUndObj,
1232  unsigned &ScanLimit, unsigned &WalkerStepLimit,
1233  bool IsMemTerm, unsigned &PartialLimit) {
1234  if (ScanLimit == 0 || WalkerStepLimit == 0) {
1235  LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n");
1236  return None;
1237  }
1238 
1239  MemoryAccess *Current = StartAccess;
1240  Instruction *KillingI = KillingDef->getMemoryInst();
1241  LLVM_DEBUG(dbgs() << " trying to get dominating access\n");
1242 
1243  // Only optimize defining access of KillingDef when directly starting at its
1244  // defining access. The defining access also must only access KillingLoc. At
1245  // the moment we only support instructions with a single write location, so
1246  // it should be sufficient to disable optimizations for instructions that
1247  // also read from memory.
1248  bool CanOptimize = OptimizeMemorySSA &&
1249  KillingDef->getDefiningAccess() == StartAccess &&
1250  !KillingI->mayReadFromMemory();
1251 
1252  // Find the next clobbering Mod access for DefLoc, starting at StartAccess.
1253  Optional<MemoryLocation> CurrentLoc;
1254  for (;; Current = cast<MemoryDef>(Current)->getDefiningAccess()) {
1255  LLVM_DEBUG({
1256  dbgs() << " visiting " << *Current;
1257  if (!MSSA.isLiveOnEntryDef(Current) && isa<MemoryUseOrDef>(Current))
1258  dbgs() << " (" << *cast<MemoryUseOrDef>(Current)->getMemoryInst()
1259  << ")";
1260  dbgs() << "\n";
1261  });
1262 
1263  // Reached TOP.
1264  if (MSSA.isLiveOnEntryDef(Current)) {
1265  LLVM_DEBUG(dbgs() << " ... found LiveOnEntryDef\n");
1266  if (CanOptimize && Current != KillingDef->getDefiningAccess())
1267  // The first clobbering def is... none.
1268  KillingDef->setOptimized(Current);
1269  return None;
1270  }
1271 
1272  // Cost of a step. Accesses in the same block are more likely to be valid
1273  // candidates for elimination, hence consider them cheaper.
1274  unsigned StepCost = KillingDef->getBlock() == Current->getBlock()
1277  if (WalkerStepLimit <= StepCost) {
1278  LLVM_DEBUG(dbgs() << " ... hit walker step limit\n");
1279  return None;
1280  }
1281  WalkerStepLimit -= StepCost;
1282 
1283  // Return for MemoryPhis. They cannot be eliminated directly and the
1284  // caller is responsible for traversing them.
1285  if (isa<MemoryPhi>(Current)) {
1286  LLVM_DEBUG(dbgs() << " ... found MemoryPhi\n");
1287  return Current;
1288  }
1289 
1290  // Below, check if CurrentDef is a valid candidate to be eliminated by
1291  // KillingDef. If it is not, check the next candidate.
1292  MemoryDef *CurrentDef = cast<MemoryDef>(Current);
1293  Instruction *CurrentI = CurrentDef->getMemoryInst();
1294 
1295  if (canSkipDef(CurrentDef, !isInvisibleToCallerOnUnwind(KillingUndObj))) {
1296  CanOptimize = false;
1297  continue;
1298  }
1299 
1300  // Before we try to remove anything, check for any extra throwing
1301  // instructions that block us from DSEing
1302  if (mayThrowBetween(KillingI, CurrentI, KillingUndObj)) {
1303  LLVM_DEBUG(dbgs() << " ... skip, may throw!\n");
1304  return None;
1305  }
1306 
1307  // Check for anything that looks like it will be a barrier to further
1308  // removal
1309  if (isDSEBarrier(KillingUndObj, CurrentI)) {
1310  LLVM_DEBUG(dbgs() << " ... skip, barrier\n");
1311  return None;
1312  }
1313 
1314  // If Current is known to be on path that reads DefLoc or is a read
1315  // clobber, bail out, as the path is not profitable. We skip this check
1316  // for intrinsic calls, because the code knows how to handle memcpy
1317  // intrinsics.
1318  if (!isa<IntrinsicInst>(CurrentI) && isReadClobber(KillingLoc, CurrentI))
1319  return None;
1320 
1321  // Quick check if there are direct uses that are read-clobbers.
1322  if (any_of(Current->uses(), [this, &KillingLoc, StartAccess](Use &U) {
1323  if (auto *UseOrDef = dyn_cast<MemoryUseOrDef>(U.getUser()))
1324  return !MSSA.dominates(StartAccess, UseOrDef) &&
1325  isReadClobber(KillingLoc, UseOrDef->getMemoryInst());
1326  return false;
1327  })) {
1328  LLVM_DEBUG(dbgs() << " ... found a read clobber\n");
1329  return None;
1330  }
1331 
1332  // If Current does not have an analyzable write location or is not
1333  // removable, skip it.
1334  CurrentLoc = getLocForWrite(CurrentI);
1335  if (!CurrentLoc || !isRemovable(CurrentI)) {
1336  CanOptimize = false;
1337  continue;
1338  }
1339 
1340  // AliasAnalysis does not account for loops. Limit elimination to
1341  // candidates for which we can guarantee they always store to the same
1342  // memory location and not located in different loops.
1343  if (!isGuaranteedLoopIndependent(CurrentI, KillingI, *CurrentLoc)) {
1344  LLVM_DEBUG(dbgs() << " ... not guaranteed loop independent\n");
1345  CanOptimize = false;
1346  continue;
1347  }
1348 
1349  if (IsMemTerm) {
1350  // If the killing def is a memory terminator (e.g. lifetime.end), check
1351  // the next candidate if the current Current does not write the same
1352  // underlying object as the terminator.
1353  if (!isMemTerminator(*CurrentLoc, CurrentI, KillingI)) {
1354  CanOptimize = false;
1355  continue;
1356  }
1357  } else {
1358  int64_t KillingOffset = 0;
1359  int64_t DeadOffset = 0;
1360  auto OR = isOverwrite(KillingI, CurrentI, KillingLoc, *CurrentLoc,
1361  KillingOffset, DeadOffset);
1362  if (CanOptimize) {
1363  // CurrentDef is the earliest write clobber of KillingDef. Use it as
1364  // optimized access. Do not optimize if CurrentDef is already the
1365  // defining access of KillingDef.
1366  if (CurrentDef != KillingDef->getDefiningAccess() &&
1367  (OR == OW_Complete || OR == OW_MaybePartial))
1368  KillingDef->setOptimized(CurrentDef);
1369 
1370  // Once a may-aliasing def is encountered do not set an optimized
1371  // access.
1372  if (OR != OW_None)
1373  CanOptimize = false;
1374  }
1375 
1376  // If Current does not write to the same object as KillingDef, check
1377  // the next candidate.
1378  if (OR == OW_Unknown || OR == OW_None)
1379  continue;
1380  else if (OR == OW_MaybePartial) {
1381  // If KillingDef only partially overwrites Current, check the next
1382  // candidate if the partial step limit is exceeded. This aggressively
1383  // limits the number of candidates for partial store elimination,
1384  // which are less likely to be removable in the end.
1385  if (PartialLimit <= 1) {
1386  WalkerStepLimit -= 1;
1387  LLVM_DEBUG(dbgs() << " ... reached partial limit ... continue with next access\n");
1388  continue;
1389  }
1390  PartialLimit -= 1;
1391  }
1392  }
1393  break;
1394  };
1395 
1396  // Accesses to objects accessible after the function returns can only be
1397  // eliminated if the access is dead along all paths to the exit. Collect
1398  // the blocks with killing (=completely overwriting MemoryDefs) and check if
1399  // they cover all paths from MaybeDeadAccess to any function exit.
1400  SmallPtrSet<Instruction *, 16> KillingDefs;
1401  KillingDefs.insert(KillingDef->getMemoryInst());
1402  MemoryAccess *MaybeDeadAccess = Current;
1403  MemoryLocation MaybeDeadLoc = *CurrentLoc;
1404  Instruction *MaybeDeadI = cast<MemoryDef>(MaybeDeadAccess)->getMemoryInst();
1405  LLVM_DEBUG(dbgs() << " Checking for reads of " << *MaybeDeadAccess << " ("
1406  << *MaybeDeadI << ")\n");
1407 
1409  auto PushMemUses = [&WorkList](MemoryAccess *Acc) {
1410  for (Use &U : Acc->uses())
1411  WorkList.insert(cast<MemoryAccess>(U.getUser()));
1412  };
1413  PushMemUses(MaybeDeadAccess);
1414 
1415  // Check if DeadDef may be read.
1416  for (unsigned I = 0; I < WorkList.size(); I++) {
1417  MemoryAccess *UseAccess = WorkList[I];
1418 
1419  LLVM_DEBUG(dbgs() << " " << *UseAccess);
1420  // Bail out if the number of accesses to check exceeds the scan limit.
1421  if (ScanLimit < (WorkList.size() - I)) {
1422  LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n");
1423  return None;
1424  }
1425  --ScanLimit;
1426  NumDomMemDefChecks++;
1427 
1428  if (isa<MemoryPhi>(UseAccess)) {
1429  if (any_of(KillingDefs, [this, UseAccess](Instruction *KI) {
1430  return DT.properlyDominates(KI->getParent(),
1431  UseAccess->getBlock());
1432  })) {
1433  LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing block\n");
1434  continue;
1435  }
1436  LLVM_DEBUG(dbgs() << "\n ... adding PHI uses\n");
1437  PushMemUses(UseAccess);
1438  continue;
1439  }
1440 
1441  Instruction *UseInst = cast<MemoryUseOrDef>(UseAccess)->getMemoryInst();
1442  LLVM_DEBUG(dbgs() << " (" << *UseInst << ")\n");
1443 
1444  if (any_of(KillingDefs, [this, UseInst](Instruction *KI) {
1445  return DT.dominates(KI, UseInst);
1446  })) {
1447  LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing def\n");
1448  continue;
1449  }
1450 
1451  // A memory terminator kills all preceeding MemoryDefs and all succeeding
1452  // MemoryAccesses. We do not have to check it's users.
1453  if (isMemTerminator(MaybeDeadLoc, MaybeDeadI, UseInst)) {
1454  LLVM_DEBUG(
1455  dbgs()
1456  << " ... skipping, memterminator invalidates following accesses\n");
1457  continue;
1458  }
1459 
1460  if (isNoopIntrinsic(cast<MemoryUseOrDef>(UseAccess)->getMemoryInst())) {
1461  LLVM_DEBUG(dbgs() << " ... adding uses of intrinsic\n");
1462  PushMemUses(UseAccess);
1463  continue;
1464  }
1465 
1466  if (UseInst->mayThrow() && !isInvisibleToCallerOnUnwind(KillingUndObj)) {
1467  LLVM_DEBUG(dbgs() << " ... found throwing instruction\n");
1468  return None;
1469  }
1470 
1471  // Uses which may read the original MemoryDef mean we cannot eliminate the
1472  // original MD. Stop walk.
1473  if (isReadClobber(MaybeDeadLoc, UseInst)) {
1474  LLVM_DEBUG(dbgs() << " ... found read clobber\n");
1475  return None;
1476  }
1477 
1478  // If this worklist walks back to the original memory access (and the
1479  // pointer is not guarenteed loop invariant) then we cannot assume that a
1480  // store kills itself.
1481  if (MaybeDeadAccess == UseAccess &&
1482  !isGuaranteedLoopInvariant(MaybeDeadLoc.Ptr)) {
1483  LLVM_DEBUG(dbgs() << " ... found not loop invariant self access\n");
1484  return None;
1485  }
1486  // Otherwise, for the KillingDef and MaybeDeadAccess we only have to check
1487  // if it reads the memory location.
1488  // TODO: It would probably be better to check for self-reads before
1489  // calling the function.
1490  if (KillingDef == UseAccess || MaybeDeadAccess == UseAccess) {
1491  LLVM_DEBUG(dbgs() << " ... skipping killing def/dom access\n");
1492  continue;
1493  }
1494 
1495  // Check all uses for MemoryDefs, except for defs completely overwriting
1496  // the original location. Otherwise we have to check uses of *all*
1497  // MemoryDefs we discover, including non-aliasing ones. Otherwise we might
1498  // miss cases like the following
1499  // 1 = Def(LoE) ; <----- DeadDef stores [0,1]
1500  // 2 = Def(1) ; (2, 1) = NoAlias, stores [2,3]
1501  // Use(2) ; MayAlias 2 *and* 1, loads [0, 3].
1502  // (The Use points to the *first* Def it may alias)
1503  // 3 = Def(1) ; <---- Current (3, 2) = NoAlias, (3,1) = MayAlias,
1504  // stores [0,1]
1505  if (MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess)) {
1506  if (isCompleteOverwrite(MaybeDeadLoc, MaybeDeadI, UseInst)) {
1507  BasicBlock *MaybeKillingBlock = UseInst->getParent();
1508  if (PostOrderNumbers.find(MaybeKillingBlock)->second <
1509  PostOrderNumbers.find(MaybeDeadAccess->getBlock())->second) {
1510  if (!isInvisibleToCallerAfterRet(KillingUndObj)) {
1511  LLVM_DEBUG(dbgs()
1512  << " ... found killing def " << *UseInst << "\n");
1513  KillingDefs.insert(UseInst);
1514  }
1515  } else {
1516  LLVM_DEBUG(dbgs()
1517  << " ... found preceeding def " << *UseInst << "\n");
1518  return None;
1519  }
1520  } else
1521  PushMemUses(UseDef);
1522  }
1523  }
1524 
1525  // For accesses to locations visible after the function returns, make sure
1526  // that the location is dead (=overwritten) along all paths from
1527  // MaybeDeadAccess to the exit.
1528  if (!isInvisibleToCallerAfterRet(KillingUndObj)) {
1529  SmallPtrSet<BasicBlock *, 16> KillingBlocks;
1530  for (Instruction *KD : KillingDefs)
1531  KillingBlocks.insert(KD->getParent());
1532  assert(!KillingBlocks.empty() &&
1533  "Expected at least a single killing block");
1534 
1535  // Find the common post-dominator of all killing blocks.
1536  BasicBlock *CommonPred = *KillingBlocks.begin();
1537  for (BasicBlock *BB : llvm::drop_begin(KillingBlocks)) {
1538  if (!CommonPred)
1539  break;
1540  CommonPred = PDT.findNearestCommonDominator(CommonPred, BB);
1541  }
1542 
1543  // If the common post-dominator does not post-dominate MaybeDeadAccess,
1544  // there is a path from MaybeDeadAccess to an exit not going through a
1545  // killing block.
1546  if (!PDT.dominates(CommonPred, MaybeDeadAccess->getBlock())) {
1547  if (!AnyUnreachableExit)
1548  return None;
1549 
1550  // Fall back to CFG scan starting at all non-unreachable roots if not
1551  // all paths to the exit go through CommonPred.
1552  CommonPred = nullptr;
1553  }
1554 
1555  // If CommonPred itself is in the set of killing blocks, we're done.
1556  if (KillingBlocks.count(CommonPred))
1557  return {MaybeDeadAccess};
1558 
1559  SetVector<BasicBlock *> WorkList;
1560  // If CommonPred is null, there are multiple exits from the function.
1561  // They all have to be added to the worklist.
1562  if (CommonPred)
1563  WorkList.insert(CommonPred);
1564  else
1565  for (BasicBlock *R : PDT.roots()) {
1566  if (!isa<UnreachableInst>(R->getTerminator()))
1567  WorkList.insert(R);
1568  }
1569 
1570  NumCFGTries++;
1571  // Check if all paths starting from an exit node go through one of the
1572  // killing blocks before reaching MaybeDeadAccess.
1573  for (unsigned I = 0; I < WorkList.size(); I++) {
1574  NumCFGChecks++;
1575  BasicBlock *Current = WorkList[I];
1576  if (KillingBlocks.count(Current))
1577  continue;
1578  if (Current == MaybeDeadAccess->getBlock())
1579  return None;
1580 
1581  // MaybeDeadAccess is reachable from the entry, so we don't have to
1582  // explore unreachable blocks further.
1583  if (!DT.isReachableFromEntry(Current))
1584  continue;
1585 
1586  for (BasicBlock *Pred : predecessors(Current))
1587  WorkList.insert(Pred);
1588 
1589  if (WorkList.size() >= MemorySSAPathCheckLimit)
1590  return None;
1591  }
1592  NumCFGSuccess++;
1593  }
1594 
1595  // No aliasing MemoryUses of MaybeDeadAccess found, MaybeDeadAccess is
1596  // potentially dead.
1597  return {MaybeDeadAccess};
1598  }
1599 
1600  // Delete dead memory defs
1602  MemorySSAUpdater Updater(&MSSA);
1603  SmallVector<Instruction *, 32> NowDeadInsts;
1604  NowDeadInsts.push_back(SI);
1605  --NumFastOther;
1606 
1607  while (!NowDeadInsts.empty()) {
1608  Instruction *DeadInst = NowDeadInsts.pop_back_val();
1609  ++NumFastOther;
1610 
1611  // Try to preserve debug information attached to the dead instruction.
1612  salvageDebugInfo(*DeadInst);
1613  salvageKnowledge(DeadInst);
1614 
1615  // Remove the Instruction from MSSA.
1616  if (MemoryAccess *MA = MSSA.getMemoryAccess(DeadInst)) {
1617  if (MemoryDef *MD = dyn_cast<MemoryDef>(MA)) {
1618  SkipStores.insert(MD);
1619  if (auto *SI = dyn_cast<StoreInst>(MD->getMemoryInst())) {
1620  if (SI->getValueOperand()->getType()->isPointerTy()) {
1621  const Value *UO = getUnderlyingObject(SI->getValueOperand());
1622  if (CapturedBeforeReturn.erase(UO))
1623  ShouldIterateEndOfFunctionDSE = true;
1624  InvisibleToCallerAfterRet.erase(UO);
1625  }
1626  }
1627  }
1628 
1629  Updater.removeMemoryAccess(MA);
1630  }
1631 
1632  auto I = IOLs.find(DeadInst->getParent());
1633  if (I != IOLs.end())
1634  I->second.erase(DeadInst);
1635  // Remove its operands
1636  for (Use &O : DeadInst->operands())
1637  if (Instruction *OpI = dyn_cast<Instruction>(O)) {
1638  O = nullptr;
1639  if (isInstructionTriviallyDead(OpI, &TLI))
1640  NowDeadInsts.push_back(OpI);
1641  }
1642 
1643  EI.removeInstruction(DeadInst);
1644  DeadInst->eraseFromParent();
1645  }
1646  }
1647 
1648  // Check for any extra throws between \p KillingI and \p DeadI that block
1649  // DSE. This only checks extra maythrows (those that aren't MemoryDef's).
1650  // MemoryDef that may throw are handled during the walk from one def to the
1651  // next.
1652  bool mayThrowBetween(Instruction *KillingI, Instruction *DeadI,
1653  const Value *KillingUndObj) {
1654  // First see if we can ignore it by using the fact that KillingI is an
1655  // alloca/alloca like object that is not visible to the caller during
1656  // execution of the function.
1657  if (KillingUndObj && isInvisibleToCallerOnUnwind(KillingUndObj))
1658  return false;
1659 
1660  if (KillingI->getParent() == DeadI->getParent())
1661  return ThrowingBlocks.count(KillingI->getParent());
1662  return !ThrowingBlocks.empty();
1663  }
1664 
1665  // Check if \p DeadI acts as a DSE barrier for \p KillingI. The following
1666  // instructions act as barriers:
1667  // * A memory instruction that may throw and \p KillingI accesses a non-stack
1668  // object.
1669  // * Atomic stores stronger that monotonic.
1670  bool isDSEBarrier(const Value *KillingUndObj, Instruction *DeadI) {
1671  // If DeadI may throw it acts as a barrier, unless we are to an
1672  // alloca/alloca like object that does not escape.
1673  if (DeadI->mayThrow() && !isInvisibleToCallerOnUnwind(KillingUndObj))
1674  return true;
1675 
1676  // If DeadI is an atomic load/store stronger than monotonic, do not try to
1677  // eliminate/reorder it.
1678  if (DeadI->isAtomic()) {
1679  if (auto *LI = dyn_cast<LoadInst>(DeadI))
1680  return isStrongerThanMonotonic(LI->getOrdering());
1681  if (auto *SI = dyn_cast<StoreInst>(DeadI))
1682  return isStrongerThanMonotonic(SI->getOrdering());
1683  if (auto *ARMW = dyn_cast<AtomicRMWInst>(DeadI))
1684  return isStrongerThanMonotonic(ARMW->getOrdering());
1685  if (auto *CmpXchg = dyn_cast<AtomicCmpXchgInst>(DeadI))
1686  return isStrongerThanMonotonic(CmpXchg->getSuccessOrdering()) ||
1687  isStrongerThanMonotonic(CmpXchg->getFailureOrdering());
1688  llvm_unreachable("other instructions should be skipped in MemorySSA");
1689  }
1690  return false;
1691  }
1692 
1693  /// Eliminate writes to objects that are not visible in the caller and are not
1694  /// accessed before returning from the function.
1695  bool eliminateDeadWritesAtEndOfFunction() {
1696  bool MadeChange = false;
1697  LLVM_DEBUG(
1698  dbgs()
1699  << "Trying to eliminate MemoryDefs at the end of the function\n");
1700  do {
1701  ShouldIterateEndOfFunctionDSE = false;
1702  for (MemoryDef *Def : llvm::reverse(MemDefs)) {
1703  if (SkipStores.contains(Def))
1704  continue;
1705 
1706  Instruction *DefI = Def->getMemoryInst();
1707  auto DefLoc = getLocForWrite(DefI);
1708  if (!DefLoc || !isRemovable(DefI))
1709  continue;
1710 
1711  // NOTE: Currently eliminating writes at the end of a function is
1712  // limited to MemoryDefs with a single underlying object, to save
1713  // compile-time. In practice it appears the case with multiple
1714  // underlying objects is very uncommon. If it turns out to be important,
1715  // we can use getUnderlyingObjects here instead.
1716  const Value *UO = getUnderlyingObject(DefLoc->Ptr);
1717  if (!isInvisibleToCallerAfterRet(UO))
1718  continue;
1719 
1720  if (isWriteAtEndOfFunction(Def)) {
1721  // See through pointer-to-pointer bitcasts
1722  LLVM_DEBUG(dbgs() << " ... MemoryDef is not accessed until the end "
1723  "of the function\n");
1724  deleteDeadInstruction(DefI);
1725  ++NumFastStores;
1726  MadeChange = true;
1727  }
1728  }
1729  } while (ShouldIterateEndOfFunctionDSE);
1730  return MadeChange;
1731  }
1732 
1733  /// If we have a zero initializing memset following a call to malloc,
1734  /// try folding it into a call to calloc.
1735  bool tryFoldIntoCalloc(MemoryDef *Def, const Value *DefUO) {
1736  Instruction *DefI = Def->getMemoryInst();
1737  MemSetInst *MemSet = dyn_cast<MemSetInst>(DefI);
1738  if (!MemSet)
1739  // TODO: Could handle zero store to small allocation as well.
1740  return false;
1741  Constant *StoredConstant = dyn_cast<Constant>(MemSet->getValue());
1742  if (!StoredConstant || !StoredConstant->isNullValue())
1743  return false;
1744 
1745  if (!isRemovable(DefI))
1746  // The memset might be volatile..
1747  return false;
1748 
1749  if (F.hasFnAttribute(Attribute::SanitizeMemory) ||
1750  F.hasFnAttribute(Attribute::SanitizeAddress) ||
1751  F.hasFnAttribute(Attribute::SanitizeHWAddress) ||
1752  F.getName() == "calloc")
1753  return false;
1754  auto *Malloc = const_cast<CallInst *>(dyn_cast<CallInst>(DefUO));
1755  if (!Malloc)
1756  return false;
1757  auto *InnerCallee = Malloc->getCalledFunction();
1758  if (!InnerCallee)
1759  return false;
1760  LibFunc Func;
1761  if (!TLI.getLibFunc(*InnerCallee, Func) || !TLI.has(Func) ||
1762  Func != LibFunc_malloc)
1763  return false;
1764 
1765  auto shouldCreateCalloc = [](CallInst *Malloc, CallInst *Memset) {
1766  // Check for br(icmp ptr, null), truebb, falsebb) pattern at the end
1767  // of malloc block
1768  auto *MallocBB = Malloc->getParent(),
1769  *MemsetBB = Memset->getParent();
1770  if (MallocBB == MemsetBB)
1771  return true;
1772  auto *Ptr = Memset->getArgOperand(0);
1773  auto *TI = MallocBB->getTerminator();
1774  ICmpInst::Predicate Pred;
1775  BasicBlock *TrueBB, *FalseBB;
1776  if (!match(TI, m_Br(m_ICmp(Pred, m_Specific(Ptr), m_Zero()), TrueBB,
1777  FalseBB)))
1778  return false;
1779  if (Pred != ICmpInst::ICMP_EQ || MemsetBB != FalseBB)
1780  return false;
1781  return true;
1782  };
1783 
1784  if (Malloc->getOperand(0) != MemSet->getLength())
1785  return false;
1786  if (!shouldCreateCalloc(Malloc, MemSet) ||
1787  !DT.dominates(Malloc, MemSet) ||
1788  !memoryIsNotModifiedBetween(Malloc, MemSet, BatchAA, DL, &DT))
1789  return false;
1790  IRBuilder<> IRB(Malloc);
1791  const auto &DL = Malloc->getModule()->getDataLayout();
1792  auto *Calloc =
1793  emitCalloc(ConstantInt::get(IRB.getIntPtrTy(DL), 1),
1794  Malloc->getArgOperand(0), IRB, TLI);
1795  if (!Calloc)
1796  return false;
1797  MemorySSAUpdater Updater(&MSSA);
1798  auto *LastDef =
1799  cast<MemoryDef>(Updater.getMemorySSA()->getMemoryAccess(Malloc));
1800  auto *NewAccess =
1801  Updater.createMemoryAccessAfter(cast<Instruction>(Calloc), LastDef,
1802  LastDef);
1803  auto *NewAccessMD = cast<MemoryDef>(NewAccess);
1804  Updater.insertDef(NewAccessMD, /*RenameUses=*/true);
1805  Updater.removeMemoryAccess(Malloc);
1806  Malloc->replaceAllUsesWith(Calloc);
1807  Malloc->eraseFromParent();
1808  return true;
1809  }
1810 
1811  /// \returns true if \p Def is a no-op store, either because it
1812  /// directly stores back a loaded value or stores zero to a calloced object.
1813  bool storeIsNoop(MemoryDef *Def, const Value *DefUO) {
1814  Instruction *DefI = Def->getMemoryInst();
1815  StoreInst *Store = dyn_cast<StoreInst>(DefI);
1816  MemSetInst *MemSet = dyn_cast<MemSetInst>(DefI);
1817  Constant *StoredConstant = nullptr;
1818  if (Store)
1819  StoredConstant = dyn_cast<Constant>(Store->getOperand(0));
1820  else if (MemSet)
1821  StoredConstant = dyn_cast<Constant>(MemSet->getValue());
1822  else
1823  return false;
1824 
1825  if (!isRemovable(DefI))
1826  return false;
1827 
1828  if (StoredConstant) {
1829  Constant *InitC =
1830  getInitialValueOfAllocation(DefUO, &TLI, StoredConstant->getType());
1831  // If the clobbering access is LiveOnEntry, no instructions between them
1832  // can modify the memory location.
1833  if (InitC && InitC == StoredConstant)
1834  return MSSA.isLiveOnEntryDef(
1836  }
1837 
1838  if (!Store)
1839  return false;
1840 
1841  if (auto *LoadI = dyn_cast<LoadInst>(Store->getOperand(0))) {
1842  if (LoadI->getPointerOperand() == Store->getOperand(1)) {
1843  // Get the defining access for the load.
1844  auto *LoadAccess = MSSA.getMemoryAccess(LoadI)->getDefiningAccess();
1845  // Fast path: the defining accesses are the same.
1846  if (LoadAccess == Def->getDefiningAccess())
1847  return true;
1848 
1849  // Look through phi accesses. Recursively scan all phi accesses by
1850  // adding them to a worklist. Bail when we run into a memory def that
1851  // does not match LoadAccess.
1852  SetVector<MemoryAccess *> ToCheck;
1853  MemoryAccess *Current =
1855  // We don't want to bail when we run into the store memory def. But,
1856  // the phi access may point to it. So, pretend like we've already
1857  // checked it.
1858  ToCheck.insert(Def);
1859  ToCheck.insert(Current);
1860  // Start at current (1) to simulate already having checked Def.
1861  for (unsigned I = 1; I < ToCheck.size(); ++I) {
1862  Current = ToCheck[I];
1863  if (auto PhiAccess = dyn_cast<MemoryPhi>(Current)) {
1864  // Check all the operands.
1865  for (auto &Use : PhiAccess->incoming_values())
1866  ToCheck.insert(cast<MemoryAccess>(&Use));
1867  continue;
1868  }
1869 
1870  // If we found a memory def, bail. This happens when we have an
1871  // unrelated write in between an otherwise noop store.
1872  assert(isa<MemoryDef>(Current) &&
1873  "Only MemoryDefs should reach here.");
1874  // TODO: Skip no alias MemoryDefs that have no aliasing reads.
1875  // We are searching for the definition of the store's destination.
1876  // So, if that is the same definition as the load, then this is a
1877  // noop. Otherwise, fail.
1878  if (LoadAccess != Current) {
1879  // This is a potentially clobbering store, but it writes the same
1880  // value, so we can safely ignore it if alignment is as expected.
1881  if (auto *CurrentDef = cast<MemoryDef>(Current))
1882  if (auto *CurrentStoreI =
1883  dyn_cast_or_null<StoreInst>(CurrentDef->getMemoryInst()))
1884  // Check alignment to ensure load or store does not access at an
1885  // offset.
1886  if (CurrentStoreI->getValueOperand() == LoadI) {
1887  TypeSize StoreSize = DL.getTypeStoreSize(LoadI->getType());
1888  if (!StoreSize.isScalable() &&
1889  std::min(CurrentStoreI->getAlign(), LoadI->getAlign()) >=
1890  StoreSize)
1891  continue;
1892  }
1893  return false;
1894  }
1895  }
1896  return true;
1897  }
1898  }
1899 
1900  return false;
1901  }
1902 
1903  bool removePartiallyOverlappedStores(InstOverlapIntervalsTy &IOL) {
1904  bool Changed = false;
1905  for (auto OI : IOL) {
1906  Instruction *DeadI = OI.first;
1907  MemoryLocation Loc = *getLocForWrite(DeadI);
1908  assert(isRemovable(DeadI) && "Expect only removable instruction");
1909 
1910  const Value *Ptr = Loc.Ptr->stripPointerCasts();
1911  int64_t DeadStart = 0;
1912  uint64_t DeadSize = Loc.Size.getValue();
1914  OverlapIntervalsTy &IntervalMap = OI.second;
1915  Changed |= tryToShortenEnd(DeadI, IntervalMap, DeadStart, DeadSize);
1916  if (IntervalMap.empty())
1917  continue;
1918  Changed |= tryToShortenBegin(DeadI, IntervalMap, DeadStart, DeadSize);
1919  }
1920  return Changed;
1921  }
1922 
1923  /// Eliminates writes to locations where the value that is being written
1924  /// is already stored at the same location.
1925  bool eliminateRedundantStoresOfExistingValues() {
1926  bool MadeChange = false;
1927  LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs that write the "
1928  "already existing value\n");
1929  for (auto *Def : MemDefs) {
1930  if (SkipStores.contains(Def) || MSSA.isLiveOnEntryDef(Def))
1931  continue;
1932 
1933  Instruction *DefInst = Def->getMemoryInst();
1934  auto MaybeDefLoc = getLocForWrite(DefInst);
1935  if (!MaybeDefLoc || !isRemovable(DefInst))
1936  continue;
1937 
1938  MemoryDef *UpperDef;
1939  // To conserve compile-time, we avoid walking to the next clobbering def.
1940  // Instead, we just try to get the optimized access, if it exists. DSE
1941  // will try to optimize defs during the earlier traversal.
1942  if (Def->isOptimized())
1943  UpperDef = dyn_cast<MemoryDef>(Def->getOptimized());
1944  else
1945  UpperDef = dyn_cast<MemoryDef>(Def->getDefiningAccess());
1946  if (!UpperDef || MSSA.isLiveOnEntryDef(UpperDef))
1947  continue;
1948 
1949  Instruction *UpperInst = UpperDef->getMemoryInst();
1950  auto IsRedundantStore = [&]() {
1951  if (DefInst->isIdenticalTo(UpperInst))
1952  return true;
1953  if (auto *MemSetI = dyn_cast<MemSetInst>(UpperInst)) {
1954  if (auto *SI = dyn_cast<StoreInst>(DefInst)) {
1955  // MemSetInst must have a write location.
1956  MemoryLocation UpperLoc = *getLocForWrite(UpperInst);
1957  int64_t InstWriteOffset = 0;
1958  int64_t DepWriteOffset = 0;
1959  auto OR = isOverwrite(UpperInst, DefInst, UpperLoc, *MaybeDefLoc,
1960  InstWriteOffset, DepWriteOffset);
1961  Value *StoredByte = isBytewiseValue(SI->getValueOperand(), DL);
1962  return StoredByte && StoredByte == MemSetI->getOperand(1) &&
1963  OR == OW_Complete;
1964  }
1965  }
1966  return false;
1967  };
1968 
1969  if (!IsRedundantStore() || isReadClobber(*MaybeDefLoc, DefInst))
1970  continue;
1971  LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " << *DefInst
1972  << '\n');
1973  deleteDeadInstruction(DefInst);
1974  NumRedundantStores++;
1975  MadeChange = true;
1976  }
1977  return MadeChange;
1978  }
1979 };
1980 
1981 static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
1982  DominatorTree &DT, PostDominatorTree &PDT,
1983  AssumptionCache &AC,
1984  const TargetLibraryInfo &TLI,
1985  const LoopInfo &LI) {
1986  bool MadeChange = false;
1987 
1988  MSSA.ensureOptimizedUses();
1989  DSEState State(F, AA, MSSA, DT, PDT, AC, TLI, LI);
1990  // For each store:
1991  for (unsigned I = 0; I < State.MemDefs.size(); I++) {
1992  MemoryDef *KillingDef = State.MemDefs[I];
1993  if (State.SkipStores.count(KillingDef))
1994  continue;
1995  Instruction *KillingI = KillingDef->getMemoryInst();
1996 
1997  Optional<MemoryLocation> MaybeKillingLoc;
1998  if (State.isMemTerminatorInst(KillingI))
1999  MaybeKillingLoc = State.getLocForTerminator(KillingI).transform(
2000  [](const std::pair<MemoryLocation, bool> &P) { return P.first; });
2001  else
2002  MaybeKillingLoc = State.getLocForWrite(KillingI);
2003 
2004  if (!MaybeKillingLoc) {
2005  LLVM_DEBUG(dbgs() << "Failed to find analyzable write location for "
2006  << *KillingI << "\n");
2007  continue;
2008  }
2009  MemoryLocation KillingLoc = *MaybeKillingLoc;
2010  assert(KillingLoc.Ptr && "KillingLoc should not be null");
2011  const Value *KillingUndObj = getUnderlyingObject(KillingLoc.Ptr);
2012  LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs killed by "
2013  << *KillingDef << " (" << *KillingI << ")\n");
2014 
2015  unsigned ScanLimit = MemorySSAScanLimit;
2016  unsigned WalkerStepLimit = MemorySSAUpwardsStepLimit;
2017  unsigned PartialLimit = MemorySSAPartialStoreLimit;
2018  // Worklist of MemoryAccesses that may be killed by KillingDef.
2019  SetVector<MemoryAccess *> ToCheck;
2020  ToCheck.insert(KillingDef->getDefiningAccess());
2021 
2022  bool Shortend = false;
2023  bool IsMemTerm = State.isMemTerminatorInst(KillingI);
2024  // Check if MemoryAccesses in the worklist are killed by KillingDef.
2025  for (unsigned I = 0; I < ToCheck.size(); I++) {
2026  MemoryAccess *Current = ToCheck[I];
2027  if (State.SkipStores.count(Current))
2028  continue;
2029 
2030  Optional<MemoryAccess *> MaybeDeadAccess = State.getDomMemoryDef(
2031  KillingDef, Current, KillingLoc, KillingUndObj, ScanLimit,
2032  WalkerStepLimit, IsMemTerm, PartialLimit);
2033 
2034  if (!MaybeDeadAccess) {
2035  LLVM_DEBUG(dbgs() << " finished walk\n");
2036  continue;
2037  }
2038 
2039  MemoryAccess *DeadAccess = *MaybeDeadAccess;
2040  LLVM_DEBUG(dbgs() << " Checking if we can kill " << *DeadAccess);
2041  if (isa<MemoryPhi>(DeadAccess)) {
2042  LLVM_DEBUG(dbgs() << "\n ... adding incoming values to worklist\n");
2043  for (Value *V : cast<MemoryPhi>(DeadAccess)->incoming_values()) {
2044  MemoryAccess *IncomingAccess = cast<MemoryAccess>(V);
2045  BasicBlock *IncomingBlock = IncomingAccess->getBlock();
2046  BasicBlock *PhiBlock = DeadAccess->getBlock();
2047 
2048  // We only consider incoming MemoryAccesses that come before the
2049  // MemoryPhi. Otherwise we could discover candidates that do not
2050  // strictly dominate our starting def.
2051  if (State.PostOrderNumbers[IncomingBlock] >
2052  State.PostOrderNumbers[PhiBlock])
2053  ToCheck.insert(IncomingAccess);
2054  }
2055  continue;
2056  }
2057  auto *DeadDefAccess = cast<MemoryDef>(DeadAccess);
2058  Instruction *DeadI = DeadDefAccess->getMemoryInst();
2059  LLVM_DEBUG(dbgs() << " (" << *DeadI << ")\n");
2060  ToCheck.insert(DeadDefAccess->getDefiningAccess());
2061  NumGetDomMemoryDefPassed++;
2062 
2063  if (!DebugCounter::shouldExecute(MemorySSACounter))
2064  continue;
2065 
2066  MemoryLocation DeadLoc = *State.getLocForWrite(DeadI);
2067 
2068  if (IsMemTerm) {
2069  const Value *DeadUndObj = getUnderlyingObject(DeadLoc.Ptr);
2070  if (KillingUndObj != DeadUndObj)
2071  continue;
2072  LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *DeadI
2073  << "\n KILLER: " << *KillingI << '\n');
2074  State.deleteDeadInstruction(DeadI);
2075  ++NumFastStores;
2076  MadeChange = true;
2077  } else {
2078  // Check if DeadI overwrites KillingI.
2079  int64_t KillingOffset = 0;
2080  int64_t DeadOffset = 0;
2081  OverwriteResult OR = State.isOverwrite(
2082  KillingI, DeadI, KillingLoc, DeadLoc, KillingOffset, DeadOffset);
2083  if (OR == OW_MaybePartial) {
2084  auto Iter = State.IOLs.insert(
2085  std::make_pair<BasicBlock *, InstOverlapIntervalsTy>(
2086  DeadI->getParent(), InstOverlapIntervalsTy()));
2087  auto &IOL = Iter.first->second;
2088  OR = isPartialOverwrite(KillingLoc, DeadLoc, KillingOffset,
2089  DeadOffset, DeadI, IOL);
2090  }
2091 
2092  if (EnablePartialStoreMerging && OR == OW_PartialEarlierWithFullLater) {
2093  auto *DeadSI = dyn_cast<StoreInst>(DeadI);
2094  auto *KillingSI = dyn_cast<StoreInst>(KillingI);
2095  // We are re-using tryToMergePartialOverlappingStores, which requires
2096  // DeadSI to dominate DeadSI.
2097  // TODO: implement tryToMergeParialOverlappingStores using MemorySSA.
2098  if (DeadSI && KillingSI && DT.dominates(DeadSI, KillingSI)) {
2100  KillingSI, DeadSI, KillingOffset, DeadOffset, State.DL,
2101  State.BatchAA, &DT)) {
2102 
2103  // Update stored value of earlier store to merged constant.
2104  DeadSI->setOperand(0, Merged);
2105  ++NumModifiedStores;
2106  MadeChange = true;
2107 
2108  Shortend = true;
2109  // Remove killing store and remove any outstanding overlap
2110  // intervals for the updated store.
2111  State.deleteDeadInstruction(KillingSI);
2112  auto I = State.IOLs.find(DeadSI->getParent());
2113  if (I != State.IOLs.end())
2114  I->second.erase(DeadSI);
2115  break;
2116  }
2117  }
2118  }
2119 
2120  if (OR == OW_Complete) {
2121  LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *DeadI
2122  << "\n KILLER: " << *KillingI << '\n');
2123  State.deleteDeadInstruction(DeadI);
2124  ++NumFastStores;
2125  MadeChange = true;
2126  }
2127  }
2128  }
2129 
2130  // Check if the store is a no-op.
2131  if (!Shortend && State.storeIsNoop(KillingDef, KillingUndObj)) {
2132  LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " << *KillingI
2133  << '\n');
2134  State.deleteDeadInstruction(KillingI);
2135  NumRedundantStores++;
2136  MadeChange = true;
2137  continue;
2138  }
2139 
2140  // Can we form a calloc from a memset/malloc pair?
2141  if (!Shortend && State.tryFoldIntoCalloc(KillingDef, KillingUndObj)) {
2142  LLVM_DEBUG(dbgs() << "DSE: Remove memset after forming calloc:\n"
2143  << " DEAD: " << *KillingI << '\n');
2144  State.deleteDeadInstruction(KillingI);
2145  MadeChange = true;
2146  continue;
2147  }
2148  }
2149 
2151  for (auto &KV : State.IOLs)
2152  MadeChange |= State.removePartiallyOverlappedStores(KV.second);
2153 
2154  MadeChange |= State.eliminateRedundantStoresOfExistingValues();
2155  MadeChange |= State.eliminateDeadWritesAtEndOfFunction();
2156  return MadeChange;
2157 }
2158 } // end anonymous namespace
2159 
2160 //===----------------------------------------------------------------------===//
2161 // DSE Pass
2162 //===----------------------------------------------------------------------===//
2167  MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
2170  LoopInfo &LI = AM.getResult<LoopAnalysis>(F);
2171 
2172  bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, AC, TLI, LI);
2173 
2174 #ifdef LLVM_ENABLE_STATS
2175  if (AreStatisticsEnabled())
2176  for (auto &I : instructions(F))
2177  NumRemainingStores += isa<StoreInst>(&I);
2178 #endif
2179 
2180  if (!Changed)
2181  return PreservedAnalyses::all();
2182 
2183  PreservedAnalyses PA;
2184  PA.preserveSet<CFGAnalyses>();
2186  PA.preserve<LoopAnalysis>();
2187  return PA;
2188 }
2189 
2190 namespace {
2191 
2192 /// A legacy pass for the legacy pass manager that wraps \c DSEPass.
2193 class DSELegacyPass : public FunctionPass {
2194 public:
2195  static char ID; // Pass identification, replacement for typeid
2196 
2197  DSELegacyPass() : FunctionPass(ID) {
2199  }
2200 
2201  bool runOnFunction(Function &F) override {
2202  if (skipFunction(F))
2203  return false;
2204 
2205  AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
2206  DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2207  const TargetLibraryInfo &TLI =
2208  getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
2209  MemorySSA &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
2210  PostDominatorTree &PDT =
2211  getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
2212  AssumptionCache &AC =
2213  getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
2214  LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2215 
2216  bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, AC, TLI, LI);
2217 
2218 #ifdef LLVM_ENABLE_STATS
2219  if (AreStatisticsEnabled())
2220  for (auto &I : instructions(F))
2221  NumRemainingStores += isa<StoreInst>(&I);
2222 #endif
2223 
2224  return Changed;
2225  }
2226 
2227  void getAnalysisUsage(AnalysisUsage &AU) const override {
2228  AU.setPreservesCFG();
2241  }
2242 };
2243 
2244 } // end anonymous namespace
2245 
2246 char DSELegacyPass::ID = 0;
2247 
2248 INITIALIZE_PASS_BEGIN(DSELegacyPass, "dse", "Dead Store Elimination", false,
2249  false)
2260  false)
2261 
2263  return new DSELegacyPass();
2264 }
dse
dse
Definition: DeadStoreElimination.cpp:2259
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
llvm::BatchAAResults
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
Definition: AliasAnalysis.h:908
llvm::Argument
This class represents an incoming formal argument to a Function.
Definition: Argument.h:28
AssumptionCache.h
llvm::AAManager
A manager for alias analyses.
Definition: AliasAnalysis.h:1260
getPointerSize
static uint64_t getPointerSize(const Value *V, const DataLayout &DL, const TargetLibraryInfo &TLI, const Function *F)
Definition: DeadStoreElimination.cpp:209
llvm::isAligned
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:146
llvm::EarliestEscapeInfo::removeInstruction
void removeInstruction(Instruction *I)
Definition: BasicAliasAnalysis.cpp:228
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:108
llvm::AliasResult::PartialAlias
@ PartialAlias
The two locations alias, but only due to a partial overlap.
Definition: AliasAnalysis.h:105
llvm::MemoryLocation::get
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
Definition: MemoryLocation.cpp:35
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::DominatorTreeBase::findNearestCommonDominator
NodeT * findNearestCommonDominator(NodeT *A, NodeT *B) const
Find nearest common dominator basic block for basic block A and B.
Definition: GenericDomTree.h:468
llvm::tgtok::Def
@ Def
Definition: TGLexer.h:50
llvm::CmpInst::ICMP_EQ
@ ICMP_EQ
equal
Definition: InstrTypes.h:740
llvm::drop_begin
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:268
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::ISD::OR
@ OR
Definition: ISDOpcodes.h:667
llvm::Type::getInt8PtrTy
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:291
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:719
llvm::User::operands
op_range operands()
Definition: User.h:242
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:87
llvm::PHITransAddr
PHITransAddr - An address value which tracks and handles phi translation.
Definition: PHITransAddr.h:35
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:104
IntrinsicInst.h
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:774
llvm::MemoryLocation::Ptr
const Value * Ptr
The address of the start of the location.
Definition: MemoryLocation.h:218
Scalar.h
MallocFamily::Malloc
@ Malloc
InstIterator.h
MemorySSAUpdater.h
llvm::Function
Definition: Function.h:60
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:546
StringRef.h
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
Pass.h
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::size
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:77
llvm::emitCalloc
Value * emitCalloc(Value *Num, Value *Size, IRBuilderBase &B, const TargetLibraryInfo &TLI)
Emit a call to the calloc function.
Definition: BuildLibCalls.cpp:1880
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1181
Statistic.h
CaptureTracking.h
llvm::EarliestEscapeInfo
Context-sensitive CaptureInfo provider, which computes and caches the earliest common dominator closu...
Definition: AliasAnalysis.h:409
ErrorHandling.h
llvm::Type::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:729
llvm::Function::getEntryBlock
const BasicBlock & getEntryBlock() const
Definition: Function.h:710
llvm::IRBuilder<>
MapVector.h
llvm::isModSet
bool isModSet(const ModRefInfo MRI)
Definition: AliasAnalysis.h:168
DeadStoreElimination.h
ValueTracking.h
Local.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
GlobalsModRef.h
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:140
llvm::PatternMatch::m_Br
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
Definition: PatternMatch.h:1731
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::erase
bool erase(const KeyT &Val)
Definition: DenseMap.h:302
llvm::APInt::getBitsSet
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:241
APInt.h
MemorySSASameBBStepCost
static cl::opt< unsigned > MemorySSASameBBStepCost("dse-memoryssa-samebb-cost", cl::init(1), cl::Hidden, cl::desc("The cost of a step in the same basic block as the killing MemoryDef" "(default = 1)"))
llvm::MemoryLocation::Size
LocationSize Size
The maximum size of the location, in address-units, or UnknownSize if the size is not known.
Definition: MemoryLocation.h:227
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
DenseMap.h
llvm::APInt::getBitWidth
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1411
Module.h
MemoryBuiltins.h
llvm::ObjectSizeOpts::NullIsUnknownSize
bool NullIsUnknownSize
If this is true, null pointers in address space 0 will be treated as though they can't be evaluated.
Definition: MemoryBuiltins.h:162
llvm::isBytewiseValue
Value * isBytewiseValue(Value *V, const DataLayout &DL)
If the specified value can be set by repeating the same byte in memory, return the i8 value that it i...
Definition: ValueTracking.cpp:3937
llvm::LoopInfoWrapperPass
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:1290
llvm::createDeadStoreEliminationPass
FunctionPass * createDeadStoreEliminationPass()
Definition: DeadStoreElimination.cpp:2262
llvm::Optional
Definition: APInt.h:33
llvm::IntervalMap::empty
bool empty() const
empty - Return true when no intervals are mapped.
Definition: IntervalMap.h:1062
llvm::PHITransAddr::IsPotentiallyPHITranslatable
bool IsPotentiallyPHITranslatable() const
IsPotentiallyPHITranslatable - If this needs PHI translation, return true if we have some hope of doi...
Definition: PHITransAddr.cpp:114
llvm::AliasResult
The possible results of an alias query.
Definition: AliasAnalysis.h:82
llvm::MapVector
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:37
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
llvm::Instruction::mayThrow
bool mayThrow() const
Return true if this instruction may throw an exception.
Definition: Instruction.cpp:711
llvm::max
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:337
tryToShortenBegin
static bool tryToShortenBegin(Instruction *DeadI, OverlapIntervalsTy &IntervalMap, int64_t &DeadStart, uint64_t &DeadSize)
Definition: DeadStoreElimination.cpp:612
llvm::LocationSize::isPrecise
bool isPrecise() const
Definition: MemoryLocation.h:166
llvm::Type::getInt8Ty
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:237
llvm::BatchAAResults::getModRefInfo
ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc)
Definition: AliasAnalysis.h:923
llvm::LinearPolySize::isScalable
bool isScalable() const
Returns whether the size is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:298
MustExecute.h
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:265
llvm::MemSetBase::getValue
Value * getValue() const
Definition: IntrinsicInst.h:853
llvm::AtomicOrdering::Monotonic
@ Monotonic
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
AliasAnalysis.h
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::DominatorTree::dominates
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
Instruction.h
CommandLine.h
CodeMetrics.h
MemorySSAScanLimit
static cl::opt< unsigned > MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(150), cl::Hidden, cl::desc("The number of memory instructions to scan for " "dead store elimination (default = 150)"))
llvm::StoreInst::getValueOperand
Value * getValueOperand()
Definition: Instructions.h:387
llvm::AreStatisticsEnabled
bool AreStatisticsEnabled()
Check if statistics are enabled.
Definition: Statistic.cpp:139
llvm::isNoAliasCall
bool isNoAliasCall(const Value *V)
Return true if this pointer is returned by a noalias function.
Definition: AliasAnalysis.cpp:944
llvm::MemorySSAWrapperPass
Legacy analysis pass which computes MemorySSA.
Definition: MemorySSA.h:984
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:24
Constants.h
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
llvm::AAResults
Definition: AliasAnalysis.h:518
PostDominators.h
llvm::Constant::isNullValue
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:76
llvm::MemorySSA::isLiveOnEntryDef
bool isLiveOnEntryDef(const MemoryAccess *MA) const
Return true if MA represents the live on entry value.
Definition: MemorySSA.h:737
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
OverlapIntervalsTy
std::map< int64_t, int64_t > OverlapIntervalsTy
Definition: DeadStoreElimination.cpp:173
llvm::LibFunc
LibFunc
Definition: TargetLibraryInfo.h:35
InstrTypes.h
llvm::PostDominatorTreeWrapperPass
Definition: PostDominators.h:73
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::LocationSize::getValue
uint64_t getValue() const
Definition: MemoryLocation.h:159
llvm::AliasResult::hasOffset
constexpr bool hasOffset() const
Definition: AliasAnalysis.h:118
TargetLibraryInfo.h
AssumeBundleBuilder.h
llvm::SPII::Store
@ Store
Definition: SparcInstrInfo.h:33
llvm::Value::uses
iterator_range< use_iterator > uses()
Definition: Value.h:376
false
Definition: StackSlotColoring.cpp:141
llvm::VectorType::getElementCount
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
Definition: DerivedTypes.h:627
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::TargetLibraryInfo::getLibFunc
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
Definition: TargetLibraryInfo.h:294
llvm::PatternMatch::m_ConstantInt
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:147
llvm::Instruction
Definition: Instruction.h:42
llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition: Type.cpp:189
llvm::DebugCounter::shouldExecute
static bool shouldExecute(unsigned CounterName)
Definition: DebugCounter.h:74
Elimination
Dead Store Elimination
Definition: DeadStoreElimination.cpp:2259
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:302
llvm::salvageDebugInfo
void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
Definition: Utils.cpp:1360
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::predecessors
auto predecessors(MachineBasicBlock *BB)
Definition: MachineSSAContext.h:30
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:879
llvm::Use::getUser
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:72
llvm::getUnderlyingObject
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value,...
Definition: ValueTracking.cpp:4496
llvm::CodeMetrics::collectEphemeralValues
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
Definition: CodeMetrics.cpp:70
SmallPtrSet.h
llvm::DominatorTreeBase::roots
iterator_range< root_iterator > roots()
Definition: GenericDomTree.h:304
PatternMatch.h
llvm::MemoryUseOrDef::getMemoryInst
Instruction * getMemoryInst() const
Get the instruction that this MemoryUse represents.
Definition: MemorySSA.h:259
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::Instruction::mayWriteToMemory
bool mayWriteToMemory() const
Return true if this instruction may modify memory.
Definition: Instruction.cpp:622
llvm::getObjectSize
bool getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout &DL, const TargetLibraryInfo *TLI, ObjectSizeOpts Opts={})
Compute the size of the object pointed by Ptr.
Definition: MemoryBuiltins.cpp:614
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(DSELegacyPass, "dse", "Dead Store Elimination", false, false) INITIALIZE_PASS_END(DSELegacyPass
llvm::None
const NoneType None
Definition: None.h:24
llvm::MemoryAccess::getBlock
BasicBlock * getBlock() const
Definition: MemorySSA.h:164
memoryIsNotModifiedBetween
static bool memoryIsNotModifiedBetween(Instruction *FirstI, Instruction *SecondI, BatchAAResults &AA, const DataLayout &DL, DominatorTree *DT)
Returns true if the memory which is accessed by the second instruction is not modified between the fi...
Definition: DeadStoreElimination.cpp:399
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
isShortenableAtTheBeginning
static bool isShortenableAtTheBeginning(Instruction *I)
Returns true if the beginning of this instruction can be safely shortened in length.
Definition: DeadStoreElimination.cpp:203
llvm::MemSetInst
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
Definition: IntrinsicInst.h:993
llvm::AliasResult::getOffset
constexpr int32_t getOffset() const
Definition: AliasAnalysis.h:119
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
LoopInfo.h
llvm::getFreedOperand
Value * getFreedOperand(const CallBase *CB, const TargetLibraryInfo *TLI)
If this if a call to a free function, return the freed operand.
Definition: MemoryBuiltins.cpp:582
llvm::SmallPtrSetImplBase::empty
bool empty() const
Definition: SmallPtrSet.h:92
llvm::VectorType
Base class of all SIMD vector types.
Definition: DerivedTypes.h:389
llvm::MemorySSAWalker::getClobberingMemoryAccess
MemoryAccess * getClobberingMemoryAccess(const Instruction *I)
Given a memory Mod/Ref/ModRef'ing instruction, calling this will give you the nearest dominating Memo...
Definition: MemorySSA.h:1044
BasicBlock.h
llvm::cl::opt< bool >
llvm::RISCVFenceField::O
@ O
Definition: RISCVBaseInfo.h:264
llvm::instructions
inst_range instructions(Function *F)
Definition: InstIterator.h:133
llvm::PatternMatch::m_Zero
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:537
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:298
llvm::getInitialValueOfAllocation
Constant * getInitialValueOfAllocation(const Value *V, const TargetLibraryInfo *TLI, Type *Ty)
If this is a call to an allocation function that initializes memory to a fixed value,...
Definition: MemoryBuiltins.cpp:459
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::Instruction::eraseFromParent
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:77
llvm::MemoryLocation::getOrNone
static Optional< MemoryLocation > getOrNone(const Instruction *Inst)
Definition: MemoryLocation.cpp:76
llvm::BatchAAResults::alias
AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
Definition: AliasAnalysis.h:917
llvm::MapVector::find
iterator find(const KeyT &Key)
Definition: MapVector.h:148
llvm::TargetLibraryInfoWrapperPass
Definition: TargetLibraryInfo.h:468
uint64_t
D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
llvm::MemorySSAUpdater
Definition: MemorySSAUpdater.h:54
Addr
uint64_t Addr
Definition: ELFObjHandler.cpp:78
llvm::AssumptionAnalysis
A function analysis which provides an AssumptionCache.
Definition: AssumptionCache.h:173
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::PreservedAnalyses::preserve
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:173
llvm::Instruction::isIdenticalTo
bool isIdenticalTo(const Instruction *I) const
Return true if the specified instruction is exactly identical to the current one.
Definition: Instruction.cpp:528
MemoryLocation.h
llvm::DenseMap
Definition: DenseMap.h:714
llvm::MemorySSA
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:700
llvm::LoopInfoBase::getLoopFor
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:989
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::MemorySSA::getMemoryAccess
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
Definition: MemorySSA.h:717
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:440
llvm::SmallPtrSetImpl::begin
iterator begin() const
Definition: SmallPtrSet.h:403
llvm::MemoryDef
Represents a read-write access to memory, whether it is a must-alias, or a may-alias.
Definition: MemorySSA.h:372
llvm::Instruction::setDebugLoc
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:351
EnablePartialStoreMerging
static cl::opt< bool > EnablePartialStoreMerging("enable-dse-partial-store-merging", cl::init(true), cl::Hidden, cl::desc("Enable partial store merging in DSE"))
llvm::GetElementPtrInst::CreateInBounds
static GetElementPtrInst * CreateInBounds(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Create an "inbounds" getelementptr.
Definition: Instructions.h:982
llvm::DenseMapBase::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
memcpy
<%struct.s * > cast struct s *S to sbyte *< sbyte * > sbyte uint cast struct s *agg result to sbyte *< sbyte * > sbyte uint cast struct s *memtmp to sbyte *< sbyte * > sbyte uint ret void llc ends up issuing two memcpy or custom lower memcpy(of small size) to be ldmia/stmia. I think option 2 is better but the current register allocator cannot allocate a chunk of registers at a time. A feasible temporary solution is to use specific physical registers at the lowering time for small(<
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::DominatorTree::isReachableFromEntry
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:335
Ptr
@ Ptr
Definition: TargetLibraryInfo.cpp:60
llvm::TargetLibraryInfo::has
bool has(LibFunc F) const
Tests whether a library function is available.
Definition: TargetLibraryInfo.h:328
MemorySSAOtherBBStepCost
static cl::opt< unsigned > MemorySSAOtherBBStepCost("dse-memoryssa-otherbb-cost", cl::init(5), cl::Hidden, cl::desc("The cost of a step in a different basic " "block than the killing MemoryDef" "(default = 5)"))
isMaskedStoreOverwrite
static OverwriteResult isMaskedStoreOverwrite(const Instruction *KillingI, const Instruction *DeadI, BatchAAResults &AA)
Check if two instruction are masked stores that completely overwrite one another.
Definition: DeadStoreElimination.cpp:238
llvm::salvageKnowledge
void salvageKnowledge(Instruction *I, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr)
Calls BuildAssumeFromInst and if the resulting llvm.assume is valid insert if before I.
Definition: AssumeBundleBuilder.cpp:293
llvm::PHITransAddr::PHITranslateValue
bool PHITranslateValue(BasicBlock *CurBB, BasicBlock *PredBB, const DominatorTree *DT, bool MustDominate)
PHITranslateValue - PHI translate the current address up the CFG from CurBB to Pred,...
Definition: PHITransAddr.cpp:314
EnablePartialOverwriteTracking
static cl::opt< bool > EnablePartialOverwriteTracking("enable-dse-partial-overwrite-tracking", cl::init(true), cl::Hidden, cl::desc("Enable partial-overwrite tracking in DSE"))
llvm::MemorySSAAnalysis
An analysis that produces MemorySSA for a function.
Definition: MemorySSA.h:934
llvm::MemorySSA::getWalker
MemorySSAWalker * getWalker()
Definition: MemorySSA.cpp:1559
llvm::AMDGPU::IsaInfo::TargetIDSetting::Off
@ Off
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:383
MemorySSAPathCheckLimit
static cl::opt< unsigned > MemorySSAPathCheckLimit("dse-memoryssa-path-check-limit", cl::init(50), cl::Hidden, cl::desc("The maximum number of blocks to check when trying to prove that " "all paths to an exit go through a killing block (default = 50)"))
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
llvm::AssumptionCacheTracker
An immutable pass that tracks lazily created AssumptionCache objects.
Definition: AssumptionCache.h:202
llvm::isInstructionTriviallyDead
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition: Local.cpp:395
llvm::LoopInfo
Definition: LoopInfo.h:1105
tryToMergePartialOverlappingStores
static Constant * tryToMergePartialOverlappingStores(StoreInst *KillingI, StoreInst *DeadI, int64_t KillingOffset, int64_t DeadOffset, const DataLayout &DL, BatchAAResults &AA, DominatorTree *DT)
Definition: DeadStoreElimination.cpp:642
llvm::PointerMayBeCaptured
bool PointerMayBeCaptured(const Value *V, bool ReturnCaptures, bool StoreCaptures, unsigned MaxUsesToExplore=0)
PointerMayBeCaptured - Return true if this pointer value may be captured by the enclosing function (w...
Definition: CaptureTracking.cpp:229
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::IntervalMap::end
const_iterator end() const
Definition: IntervalMap.h:1119
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1597
DataLayout.h
llvm::MemorySSA::getSkipSelfWalker
MemorySSAWalker * getSkipSelfWalker()
Definition: MemorySSA.cpp:1574
llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:263
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:42
llvm::PostDominatorTree
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
Definition: PostDominators.h:28
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::isNotVisibleOnUnwind
bool isNotVisibleOnUnwind(const Value *Object, bool &RequiresNoCaptureBeforeUnwind)
Return true if Object memory is not visible after an unwind, in the sense that program semantics cann...
Definition: AliasAnalysis.cpp:994
llvm::CFGAnalyses
Represents analyses that only rely on functions' control flow.
Definition: PassManager.h:113
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
getParent
static const Function * getParent(const Value *V)
Definition: BasicAliasAnalysis.cpp:838
uint32_t
llvm::AliasResult::NoAlias
@ NoAlias
The two locations do not alias at all.
Definition: AliasAnalysis.h:100
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::isRefSet
bool isRefSet(const ModRefInfo MRI)
Definition: AliasAnalysis.h:171
llvm::AliasResult::MustAlias
@ MustAlias
The two locations precisely alias each other.
Definition: AliasAnalysis.h:107
CC
auto CC
Definition: RISCVRedundantCopyElimination.cpp:79
llvm::ObjectSizeOpts
Various options to control the behavior of getObjectSize.
Definition: MemoryBuiltins.h:138
llvm::DominatorTreeBase::properlyDominates
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
Definition: GenericDomTree.h:392
llvm::Instruction::mayReadFromMemory
bool mayReadFromMemory() const
Return true if this instruction may read memory.
Definition: Instruction.cpp:602
llvm::MemoryAccess
Definition: MemorySSA.h:142
llvm::ifs::IFSSymbolType::Func
@ Func
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::insert
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:207
llvm::Value::stripPointerCasts
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:685
llvm::MapVector::end
iterator end()
Definition: MapVector.h:72
Argument.h
llvm::APInt::zext
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:973
llvm::MemorySSA::ensureOptimizedUses
void ensureOptimizedUses()
By default, uses are not optimized during MemorySSA construction.
Definition: MemorySSA.cpp:2141
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
llvm::Instruction::isAtomic
bool isAtomic() const
Return true if this instruction has an AtomicOrdering of unordered or higher.
Definition: Instruction.cpp:642
llvm::PHITransAddr::getAddr
Value * getAddr() const
Definition: PHITransAddr.h:59
Constant.h
llvm::BatchAAResults::isMustAlias
bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
Definition: AliasAnalysis.h:942
tryToShorten
static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart, uint64_t &DeadSize, int64_t KillingStart, uint64_t KillingSize, bool IsOverwriteEnd)
Definition: DeadStoreElimination.cpp:486
llvm::MemoryDependenceWrapperPass
A wrapper analysis pass for the legacy pass manager that exposes a MemoryDepnedenceResults instance.
Definition: MemoryDependenceAnalysis.h:526
llvm::MemIntrinsicBase::getLength
Value * getLength() const
Definition: IntrinsicInst.h:735
llvm::MemoryUseOrDef::getDefiningAccess
MemoryAccess * getDefiningAccess() const
Get the access that produces the memory state used by this Use.
Definition: MemorySSA.h:262
llvm::MemoryDef::setOptimized
void setOptimized(MemoryAccess *MA)
Definition: MemorySSA.h:392
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
llvm::initializeDSELegacyPassPass
void initializeDSELegacyPassPass(PassRegistry &)
llvm::DSEPass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
Definition: DeadStoreElimination.cpp:2163
llvm::Align::value
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
llvm::TypeSize
Definition: TypeSize.h:435
MemorySSADefsPerBlockLimit
static cl::opt< unsigned > MemorySSADefsPerBlockLimit("dse-memoryssa-defs-per-block-limit", cl::init(5000), cl::Hidden, cl::desc("The number of MemoryDefs we consider as candidates to eliminated " "other stores per basic block (default = 5000)"))
Casting.h
llvm::post_order
iterator_range< po_iterator< T > > post_order(const T &G)
Definition: PostOrderIterator.h:189
Function.h
OptimizeMemorySSA
static cl::opt< bool > OptimizeMemorySSA("dse-optimize-memoryssa", cl::init(true), cl::Hidden, cl::desc("Allow DSE to optimize memory accesses."))
DebugCounter.h
PassManager.h
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:222
llvm::GetPointerBaseWithConstantOffset
Value * GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, const DataLayout &DL, bool AllowNonInbounds=true)
Analyze the specified pointer to see if it can be expressed as a base pointer plus a constant offset.
Definition: ValueTracking.h:272
llvm::MemoryLocation::getWithNewPtr
MemoryLocation getWithNewPtr(const Value *NewPtr) const
Definition: MemoryLocation.h:294
MemorySSAPartialStoreLimit
static cl::opt< unsigned > MemorySSAPartialStoreLimit("dse-memoryssa-partial-store-limit", cl::init(5), cl::Hidden, cl::desc("The maximum number candidates that only partially overwrite the " "killing MemoryDef to consider" " (default = 5)"))
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:46
llvm::isStrongerThan
bool isStrongerThan(AtomicOrdering AO, AtomicOrdering Other)
Returns true if ao is stronger than other as defined by the AtomicOrdering lattice,...
Definition: AtomicOrdering.h:90
AA
DEBUG_COUNTER
DEBUG_COUNTER(MemorySSACounter, "dse-memoryssa", "Controls which MemoryDefs are eliminated.")
llvm::DominatorTreeAnalysis
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:267
MemorySSA.h
Instructions.h
PostOrderIterator.h
llvm::PreservedAnalyses::preserveSet
void preserveSet()
Mark an analysis set as preserved.
Definition: PassManager.h:188
llvm::MemoryLocation::UnknownSize
@ UnknownSize
Definition: MemoryLocation.h:215
llvm::Optional::transform
auto transform(const Function &F) const &-> Optional< decltype(F(value()))>
Apply a function to the value if present; otherwise return None.
Definition: Optional.h:345
SmallVector.h
llvm::PatternMatch::m_Specific
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:772
tryToShortenEnd
static bool tryToShortenEnd(Instruction *DeadI, OverlapIntervalsTy &IntervalMap, int64_t &DeadStart, uint64_t &DeadSize)
Definition: DeadStoreElimination.cpp:585
llvm::PatternMatch::m_ICmp
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
Definition: PatternMatch.h:1394
Dominators.h
llvm::MemoryLocation::getAfter
static MemoryLocation getAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location after Ptr, while remaining within the underlying objec...
Definition: MemoryLocation.h:270
llvm::IntervalMap::begin
const_iterator begin() const
Definition: IntervalMap.h:1107
llvm::AAResultsWrapperPass
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Definition: AliasAnalysis.h:1308
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:91
llvm::SmallVectorImpl::pop_back_val
T pop_back_val()
Definition: SmallVector.h:659
llvm::GlobalsAAWrapperPass
Legacy wrapper pass to provide the GlobalsAAResult object.
Definition: GlobalsModRef.h:144
llvm::isStrongerThanMonotonic
bool isStrongerThanMonotonic(AtomicOrdering AO)
Definition: AtomicOrdering.h:124
llvm::IntervalMap
Definition: IntervalMap.h:936
llvm::reverse
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:365
isPartialOverwrite
static OverwriteResult isPartialOverwrite(const MemoryLocation &KillingLoc, const MemoryLocation &DeadLoc, int64_t KillingOff, int64_t DeadOff, Instruction *DeadI, InstOverlapIntervalsTy &IOL)
Return 'OW_Complete' if a store to the 'KillingLoc' location completely overwrites a store to the 'De...
Definition: DeadStoreElimination.cpp:281
llvm::RegState::Dead
@ Dead
Unused definition.
Definition: MachineInstrBuilder.h:50
llvm::SmallSetVector
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:307
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:42
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1474
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:171
llvm::PostDominatorTreeAnalysis
Analysis pass which computes a PostDominatorTree.
Definition: PostDominators.h:47
deleteDeadInstruction
static void deleteDeadInstruction(Instruction *I)
Definition: LoopIdiomRecognize.cpp:347
llvm::CastInst::CreatePointerCast
static CastInst * CreatePointerCast(Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd)
Create a BitCast AddrSpaceCast, or a PtrToInt cast instruction.
Definition: Instructions.cpp:3342
llvm::MemoryLocation::getForDest
static MemoryLocation getForDest(const MemIntrinsic *MI)
Return a location representing the destination of a memory set or transfer.
Definition: MemoryLocation.cpp:106
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
MemorySSAUpwardsStepLimit
static cl::opt< unsigned > MemorySSAUpwardsStepLimit("dse-memoryssa-walklimit", cl::init(90), cl::Hidden, cl::desc("The maximum number of steps while walking upwards to find " "MemoryDefs that may be killed (default = 90)"))
llvm::cl::desc
Definition: CommandLine.h:413
raw_ostream.h
llvm::NullPointerIsDefined
bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
Definition: Function.cpp:2049
llvm::PHITransAddr::NeedsPHITranslationFromBlock
bool NeedsPHITranslationFromBlock(BasicBlock *BB) const
NeedsPHITranslationFromBlock - Return true if moving from the specified BasicBlock to its predecessor...
Definition: PHITransAddr.h:63
llvm::SetVector
A vector that has set insertion semantics.
Definition: SetVector.h:40
llvm::SmallPtrSetImpl::contains
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:389
llvm::PostDominatorTree::dominates
bool dominates(const Instruction *I1, const Instruction *I2) const
Return true if I1 dominates I2.
Definition: PostDominators.cpp:54
Value.h
InitializePasses.h
isShortenableAtTheEnd
static bool isShortenableAtTheEnd(Instruction *I)
Returns true if the end of this instruction can be safely shortened in length.
Definition: DeadStoreElimination.cpp:178
BuildLibCalls.h
llvm::offsetToAlignment
uint64_t offsetToAlignment(uint64_t Value, Align Alignment)
Returns the offset to the next integer (mod 2**64) that is greater than or equal to Value and is a mu...
Definition: Alignment.h:198
llvm::mayContainIrreducibleControl
bool mayContainIrreducibleControl(const Function &F, const LoopInfo *LI)
Definition: MustExecute.cpp:480
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
Debug.h
llvm::TargetLibraryAnalysis
Analysis pass providing the TargetLibraryInfo.
Definition: TargetLibraryInfo.h:443
llvm::MemoryLocation
Representation for a specific memory location.
Definition: MemoryLocation.h:210
llvm::LoopAnalysis
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:1265
SetVector.h
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38