LLVM  15.0.0git
DeadStoreElimination.cpp
Go to the documentation of this file.
1 //===- DeadStoreElimination.cpp - MemorySSA Backed Dead Store Elimination -===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // The code below implements dead store elimination using MemorySSA. It uses
10 // the following general approach: given a MemoryDef, walk upwards to find
11 // clobbering MemoryDefs that may be killed by the starting def. Then check
12 // that there are no uses that may read the location of the original MemoryDef
13 // in between both MemoryDefs. A bit more concretely:
14 //
15 // For all MemoryDefs StartDef:
16 // 1. Get the next dominating clobbering MemoryDef (MaybeDeadAccess) by walking
17 // upwards.
18 // 2. Check that there are no reads between MaybeDeadAccess and the StartDef by
19 // checking all uses starting at MaybeDeadAccess and walking until we see
20 // StartDef.
21 // 3. For each found CurrentDef, check that:
22 // 1. There are no barrier instructions between CurrentDef and StartDef (like
23 // throws or stores with ordering constraints).
24 // 2. StartDef is executed whenever CurrentDef is executed.
25 // 3. StartDef completely overwrites CurrentDef.
26 // 4. Erase CurrentDef from the function and MemorySSA.
27 //
28 //===----------------------------------------------------------------------===//
29 
31 #include "llvm/ADT/APInt.h"
32 #include "llvm/ADT/DenseMap.h"
33 #include "llvm/ADT/MapVector.h"
35 #include "llvm/ADT/SetVector.h"
36 #include "llvm/ADT/SmallPtrSet.h"
37 #include "llvm/ADT/SmallVector.h"
38 #include "llvm/ADT/Statistic.h"
39 #include "llvm/ADT/StringRef.h"
45 #include "llvm/Analysis/LoopInfo.h"
54 #include "llvm/IR/Argument.h"
55 #include "llvm/IR/BasicBlock.h"
56 #include "llvm/IR/Constant.h"
57 #include "llvm/IR/Constants.h"
58 #include "llvm/IR/DataLayout.h"
59 #include "llvm/IR/Dominators.h"
60 #include "llvm/IR/Function.h"
61 #include "llvm/IR/IRBuilder.h"
62 #include "llvm/IR/InstIterator.h"
63 #include "llvm/IR/InstrTypes.h"
64 #include "llvm/IR/Instruction.h"
65 #include "llvm/IR/Instructions.h"
66 #include "llvm/IR/IntrinsicInst.h"
67 #include "llvm/IR/Module.h"
68 #include "llvm/IR/PassManager.h"
69 #include "llvm/IR/PatternMatch.h"
70 #include "llvm/IR/Value.h"
71 #include "llvm/InitializePasses.h"
72 #include "llvm/Pass.h"
73 #include "llvm/Support/Casting.h"
75 #include "llvm/Support/Debug.h"
79 #include "llvm/Transforms/Scalar.h"
83 #include <algorithm>
84 #include <cassert>
85 #include <cstdint>
86 #include <iterator>
87 #include <map>
88 #include <utility>
89 
90 using namespace llvm;
91 using namespace PatternMatch;
92 
93 #define DEBUG_TYPE "dse"
94 
95 STATISTIC(NumRemainingStores, "Number of stores remaining after DSE");
96 STATISTIC(NumRedundantStores, "Number of redundant stores deleted");
97 STATISTIC(NumFastStores, "Number of stores deleted");
98 STATISTIC(NumFastOther, "Number of other instrs removed");
99 STATISTIC(NumCompletePartials, "Number of stores dead by later partials");
100 STATISTIC(NumModifiedStores, "Number of stores modified");
101 STATISTIC(NumCFGChecks, "Number of stores modified");
102 STATISTIC(NumCFGTries, "Number of stores modified");
103 STATISTIC(NumCFGSuccess, "Number of stores modified");
104 STATISTIC(NumGetDomMemoryDefPassed,
105  "Number of times a valid candidate is returned from getDomMemoryDef");
106 STATISTIC(NumDomMemDefChecks,
107  "Number iterations check for reads in getDomMemoryDef");
108 
109 DEBUG_COUNTER(MemorySSACounter, "dse-memoryssa",
110  "Controls which MemoryDefs are eliminated.");
111 
112 static cl::opt<bool>
113 EnablePartialOverwriteTracking("enable-dse-partial-overwrite-tracking",
114  cl::init(true), cl::Hidden,
115  cl::desc("Enable partial-overwrite tracking in DSE"));
116 
117 static cl::opt<bool>
118 EnablePartialStoreMerging("enable-dse-partial-store-merging",
119  cl::init(true), cl::Hidden,
120  cl::desc("Enable partial store merging in DSE"));
121 
122 static cl::opt<unsigned>
123  MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(150), cl::Hidden,
124  cl::desc("The number of memory instructions to scan for "
125  "dead store elimination (default = 150)"));
127  "dse-memoryssa-walklimit", cl::init(90), cl::Hidden,
128  cl::desc("The maximum number of steps while walking upwards to find "
129  "MemoryDefs that may be killed (default = 90)"));
130 
132  "dse-memoryssa-partial-store-limit", cl::init(5), cl::Hidden,
133  cl::desc("The maximum number candidates that only partially overwrite the "
134  "killing MemoryDef to consider"
135  " (default = 5)"));
136 
138  "dse-memoryssa-defs-per-block-limit", cl::init(5000), cl::Hidden,
139  cl::desc("The number of MemoryDefs we consider as candidates to eliminated "
140  "other stores per basic block (default = 5000)"));
141 
143  "dse-memoryssa-samebb-cost", cl::init(1), cl::Hidden,
144  cl::desc(
145  "The cost of a step in the same basic block as the killing MemoryDef"
146  "(default = 1)"));
147 
148 static cl::opt<unsigned>
149  MemorySSAOtherBBStepCost("dse-memoryssa-otherbb-cost", cl::init(5),
150  cl::Hidden,
151  cl::desc("The cost of a step in a different basic "
152  "block than the killing MemoryDef"
153  "(default = 5)"));
154 
156  "dse-memoryssa-path-check-limit", cl::init(50), cl::Hidden,
157  cl::desc("The maximum number of blocks to check when trying to prove that "
158  "all paths to an exit go through a killing block (default = 50)"));
159 
160 // This flags allows or disallows DSE to optimize MemorySSA during its
161 // traversal. Note that DSE optimizing MemorySSA may impact other passes
162 // downstream of the DSE invocation and can lead to issues not being
163 // reproducible in isolation (i.e. when MemorySSA is built from scratch). In
164 // those cases, the flag can be used to check if DSE's MemorySSA optimizations
165 // impact follow-up passes.
166 static cl::opt<bool>
167  OptimizeMemorySSA("dse-optimize-memoryssa", cl::init(true), cl::Hidden,
168  cl::desc("Allow DSE to optimize memory accesses."));
169 
170 //===----------------------------------------------------------------------===//
171 // Helper functions
172 //===----------------------------------------------------------------------===//
173 using OverlapIntervalsTy = std::map<int64_t, int64_t>;
175 
176 /// Returns true if the end of this instruction can be safely shortened in
177 /// length.
179  // Don't shorten stores for now
180  if (isa<StoreInst>(I))
181  return false;
182 
183  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
184  switch (II->getIntrinsicID()) {
185  default: return false;
186  case Intrinsic::memset:
187  case Intrinsic::memcpy:
188  case Intrinsic::memcpy_element_unordered_atomic:
189  case Intrinsic::memset_element_unordered_atomic:
190  // Do shorten memory intrinsics.
191  // FIXME: Add memmove if it's also safe to transform.
192  return true;
193  }
194  }
195 
196  // Don't shorten libcalls calls for now.
197 
198  return false;
199 }
200 
201 /// Returns true if the beginning of this instruction can be safely shortened
202 /// in length.
204  // FIXME: Handle only memset for now. Supporting memcpy/memmove should be
205  // easily done by offsetting the source address.
206  return isa<AnyMemSetInst>(I);
207 }
208 
209 static uint64_t getPointerSize(const Value *V, const DataLayout &DL,
210  const TargetLibraryInfo &TLI,
211  const Function *F) {
212  uint64_t Size;
213  ObjectSizeOpts Opts;
215 
216  if (getObjectSize(V, Size, DL, &TLI, Opts))
217  return Size;
219 }
220 
221 namespace {
222 
223 enum OverwriteResult {
224  OW_Begin,
225  OW_Complete,
226  OW_End,
227  OW_PartialEarlierWithFullLater,
228  OW_MaybePartial,
229  OW_None,
230  OW_Unknown
231 };
232 
233 } // end anonymous namespace
234 
235 /// Check if two instruction are masked stores that completely
236 /// overwrite one another. More specifically, \p KillingI has to
237 /// overwrite \p DeadI.
238 static OverwriteResult isMaskedStoreOverwrite(const Instruction *KillingI,
239  const Instruction *DeadI,
240  BatchAAResults &AA) {
241  const auto *KillingII = dyn_cast<IntrinsicInst>(KillingI);
242  const auto *DeadII = dyn_cast<IntrinsicInst>(DeadI);
243  if (KillingII == nullptr || DeadII == nullptr)
244  return OW_Unknown;
245  if (KillingII->getIntrinsicID() != Intrinsic::masked_store ||
246  DeadII->getIntrinsicID() != Intrinsic::masked_store)
247  return OW_Unknown;
248  // Pointers.
249  Value *KillingPtr = KillingII->getArgOperand(1)->stripPointerCasts();
250  Value *DeadPtr = DeadII->getArgOperand(1)->stripPointerCasts();
251  if (KillingPtr != DeadPtr && !AA.isMustAlias(KillingPtr, DeadPtr))
252  return OW_Unknown;
253  // Masks.
254  // TODO: check that KillingII's mask is a superset of the DeadII's mask.
255  if (KillingII->getArgOperand(3) != DeadII->getArgOperand(3))
256  return OW_Unknown;
257  return OW_Complete;
258 }
259 
260 /// Return 'OW_Complete' if a store to the 'KillingLoc' location completely
261 /// overwrites a store to the 'DeadLoc' location, 'OW_End' if the end of the
262 /// 'DeadLoc' location is completely overwritten by 'KillingLoc', 'OW_Begin'
263 /// if the beginning of the 'DeadLoc' location is overwritten by 'KillingLoc'.
264 /// 'OW_PartialEarlierWithFullLater' means that a dead (big) store was
265 /// overwritten by a killing (smaller) store which doesn't write outside the big
266 /// store's memory locations. Returns 'OW_Unknown' if nothing can be determined.
267 /// NOTE: This function must only be called if both \p KillingLoc and \p
268 /// DeadLoc belong to the same underlying object with valid \p KillingOff and
269 /// \p DeadOff.
270 static OverwriteResult isPartialOverwrite(const MemoryLocation &KillingLoc,
271  const MemoryLocation &DeadLoc,
272  int64_t KillingOff, int64_t DeadOff,
273  Instruction *DeadI,
274  InstOverlapIntervalsTy &IOL) {
275  const uint64_t KillingSize = KillingLoc.Size.getValue();
276  const uint64_t DeadSize = DeadLoc.Size.getValue();
277  // We may now overlap, although the overlap is not complete. There might also
278  // be other incomplete overlaps, and together, they might cover the complete
279  // dead store.
280  // Note: The correctness of this logic depends on the fact that this function
281  // is not even called providing DepWrite when there are any intervening reads.
283  KillingOff < int64_t(DeadOff + DeadSize) &&
284  int64_t(KillingOff + KillingSize) >= DeadOff) {
285 
286  // Insert our part of the overlap into the map.
287  auto &IM = IOL[DeadI];
288  LLVM_DEBUG(dbgs() << "DSE: Partial overwrite: DeadLoc [" << DeadOff << ", "
289  << int64_t(DeadOff + DeadSize) << ") KillingLoc ["
290  << KillingOff << ", " << int64_t(KillingOff + KillingSize)
291  << ")\n");
292 
293  // Make sure that we only insert non-overlapping intervals and combine
294  // adjacent intervals. The intervals are stored in the map with the ending
295  // offset as the key (in the half-open sense) and the starting offset as
296  // the value.
297  int64_t KillingIntStart = KillingOff;
298  int64_t KillingIntEnd = KillingOff + KillingSize;
299 
300  // Find any intervals ending at, or after, KillingIntStart which start
301  // before KillingIntEnd.
302  auto ILI = IM.lower_bound(KillingIntStart);
303  if (ILI != IM.end() && ILI->second <= KillingIntEnd) {
304  // This existing interval is overlapped with the current store somewhere
305  // in [KillingIntStart, KillingIntEnd]. Merge them by erasing the existing
306  // intervals and adjusting our start and end.
307  KillingIntStart = std::min(KillingIntStart, ILI->second);
308  KillingIntEnd = std::max(KillingIntEnd, ILI->first);
309  ILI = IM.erase(ILI);
310 
311  // Continue erasing and adjusting our end in case other previous
312  // intervals are also overlapped with the current store.
313  //
314  // |--- dead 1 ---| |--- dead 2 ---|
315  // |------- killing---------|
316  //
317  while (ILI != IM.end() && ILI->second <= KillingIntEnd) {
318  assert(ILI->second > KillingIntStart && "Unexpected interval");
319  KillingIntEnd = std::max(KillingIntEnd, ILI->first);
320  ILI = IM.erase(ILI);
321  }
322  }
323 
324  IM[KillingIntEnd] = KillingIntStart;
325 
326  ILI = IM.begin();
327  if (ILI->second <= DeadOff && ILI->first >= int64_t(DeadOff + DeadSize)) {
328  LLVM_DEBUG(dbgs() << "DSE: Full overwrite from partials: DeadLoc ["
329  << DeadOff << ", " << int64_t(DeadOff + DeadSize)
330  << ") Composite KillingLoc [" << ILI->second << ", "
331  << ILI->first << ")\n");
332  ++NumCompletePartials;
333  return OW_Complete;
334  }
335  }
336 
337  // Check for a dead store which writes to all the memory locations that
338  // the killing store writes to.
339  if (EnablePartialStoreMerging && KillingOff >= DeadOff &&
340  int64_t(DeadOff + DeadSize) > KillingOff &&
341  uint64_t(KillingOff - DeadOff) + KillingSize <= DeadSize) {
342  LLVM_DEBUG(dbgs() << "DSE: Partial overwrite a dead load [" << DeadOff
343  << ", " << int64_t(DeadOff + DeadSize)
344  << ") by a killing store [" << KillingOff << ", "
345  << int64_t(KillingOff + KillingSize) << ")\n");
346  // TODO: Maybe come up with a better name?
347  return OW_PartialEarlierWithFullLater;
348  }
349 
350  // Another interesting case is if the killing store overwrites the end of the
351  // dead store.
352  //
353  // |--dead--|
354  // |-- killing --|
355  //
356  // In this case we may want to trim the size of dead store to avoid
357  // generating stores to addresses which will definitely be overwritten killing
358  // store.
360  (KillingOff > DeadOff && KillingOff < int64_t(DeadOff + DeadSize) &&
361  int64_t(KillingOff + KillingSize) >= int64_t(DeadOff + DeadSize)))
362  return OW_End;
363 
364  // Finally, we also need to check if the killing store overwrites the
365  // beginning of the dead store.
366  //
367  // |--dead--|
368  // |-- killing --|
369  //
370  // In this case we may want to move the destination address and trim the size
371  // of dead store to avoid generating stores to addresses which will definitely
372  // be overwritten killing store.
374  (KillingOff <= DeadOff && int64_t(KillingOff + KillingSize) > DeadOff)) {
375  assert(int64_t(KillingOff + KillingSize) < int64_t(DeadOff + DeadSize) &&
376  "Expect to be handled as OW_Complete");
377  return OW_Begin;
378  }
379  // Otherwise, they don't completely overlap.
380  return OW_Unknown;
381 }
382 
383 /// Returns true if the memory which is accessed by the second instruction is not
384 /// modified between the first and the second instruction.
385 /// Precondition: Second instruction must be dominated by the first
386 /// instruction.
387 static bool
389  BatchAAResults &AA, const DataLayout &DL,
390  DominatorTree *DT) {
391  // Do a backwards scan through the CFG from SecondI to FirstI. Look for
392  // instructions which can modify the memory location accessed by SecondI.
393  //
394  // While doing the walk keep track of the address to check. It might be
395  // different in different basic blocks due to PHI translation.
396  using BlockAddressPair = std::pair<BasicBlock *, PHITransAddr>;
398  // Keep track of the address we visited each block with. Bail out if we
399  // visit a block with different addresses.
401 
402  BasicBlock::iterator FirstBBI(FirstI);
403  ++FirstBBI;
404  BasicBlock::iterator SecondBBI(SecondI);
405  BasicBlock *FirstBB = FirstI->getParent();
406  BasicBlock *SecondBB = SecondI->getParent();
407  MemoryLocation MemLoc;
408  if (auto *MemSet = dyn_cast<MemSetInst>(SecondI))
409  MemLoc = MemoryLocation::getForDest(MemSet);
410  else
411  MemLoc = MemoryLocation::get(SecondI);
412 
413  auto *MemLocPtr = const_cast<Value *>(MemLoc.Ptr);
414 
415  // Start checking the SecondBB.
416  WorkList.push_back(
417  std::make_pair(SecondBB, PHITransAddr(MemLocPtr, DL, nullptr)));
418  bool isFirstBlock = true;
419 
420  // Check all blocks going backward until we reach the FirstBB.
421  while (!WorkList.empty()) {
422  BlockAddressPair Current = WorkList.pop_back_val();
423  BasicBlock *B = Current.first;
424  PHITransAddr &Addr = Current.second;
425  Value *Ptr = Addr.getAddr();
426 
427  // Ignore instructions before FirstI if this is the FirstBB.
428  BasicBlock::iterator BI = (B == FirstBB ? FirstBBI : B->begin());
429 
431  if (isFirstBlock) {
432  // Ignore instructions after SecondI if this is the first visit of SecondBB.
433  assert(B == SecondBB && "first block is not the store block");
434  EI = SecondBBI;
435  isFirstBlock = false;
436  } else {
437  // It's not SecondBB or (in case of a loop) the second visit of SecondBB.
438  // In this case we also have to look at instructions after SecondI.
439  EI = B->end();
440  }
441  for (; BI != EI; ++BI) {
442  Instruction *I = &*BI;
443  if (I->mayWriteToMemory() && I != SecondI)
444  if (isModSet(AA.getModRefInfo(I, MemLoc.getWithNewPtr(Ptr))))
445  return false;
446  }
447  if (B != FirstBB) {
448  assert(B != &FirstBB->getParent()->getEntryBlock() &&
449  "Should not hit the entry block because SI must be dominated by LI");
450  for (BasicBlock *Pred : predecessors(B)) {
451  PHITransAddr PredAddr = Addr;
452  if (PredAddr.NeedsPHITranslationFromBlock(B)) {
453  if (!PredAddr.IsPotentiallyPHITranslatable())
454  return false;
455  if (PredAddr.PHITranslateValue(B, Pred, DT, false))
456  return false;
457  }
458  Value *TranslatedPtr = PredAddr.getAddr();
459  auto Inserted = Visited.insert(std::make_pair(Pred, TranslatedPtr));
460  if (!Inserted.second) {
461  // We already visited this block before. If it was with a different
462  // address - bail out!
463  if (TranslatedPtr != Inserted.first->second)
464  return false;
465  // ... otherwise just skip it.
466  continue;
467  }
468  WorkList.push_back(std::make_pair(Pred, PredAddr));
469  }
470  }
471  }
472  return true;
473 }
474 
475 static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart,
476  uint64_t &DeadSize, int64_t KillingStart,
477  uint64_t KillingSize, bool IsOverwriteEnd) {
478  auto *DeadIntrinsic = cast<AnyMemIntrinsic>(DeadI);
479  Align PrefAlign = DeadIntrinsic->getDestAlign().valueOrOne();
480 
481  // We assume that memet/memcpy operates in chunks of the "largest" native
482  // type size and aligned on the same value. That means optimal start and size
483  // of memset/memcpy should be modulo of preferred alignment of that type. That
484  // is it there is no any sense in trying to reduce store size any further
485  // since any "extra" stores comes for free anyway.
486  // On the other hand, maximum alignment we can achieve is limited by alignment
487  // of initial store.
488 
489  // TODO: Limit maximum alignment by preferred (or abi?) alignment of the
490  // "largest" native type.
491  // Note: What is the proper way to get that value?
492  // Should TargetTransformInfo::getRegisterBitWidth be used or anything else?
493  // PrefAlign = std::min(DL.getPrefTypeAlign(LargestType), PrefAlign);
494 
495  int64_t ToRemoveStart = 0;
496  uint64_t ToRemoveSize = 0;
497  // Compute start and size of the region to remove. Make sure 'PrefAlign' is
498  // maintained on the remaining store.
499  if (IsOverwriteEnd) {
500  // Calculate required adjustment for 'KillingStart' in order to keep
501  // remaining store size aligned on 'PerfAlign'.
502  uint64_t Off =
503  offsetToAlignment(uint64_t(KillingStart - DeadStart), PrefAlign);
504  ToRemoveStart = KillingStart + Off;
505  if (DeadSize <= uint64_t(ToRemoveStart - DeadStart))
506  return false;
507  ToRemoveSize = DeadSize - uint64_t(ToRemoveStart - DeadStart);
508  } else {
509  ToRemoveStart = DeadStart;
510  assert(KillingSize >= uint64_t(DeadStart - KillingStart) &&
511  "Not overlapping accesses?");
512  ToRemoveSize = KillingSize - uint64_t(DeadStart - KillingStart);
513  // Calculate required adjustment for 'ToRemoveSize'in order to keep
514  // start of the remaining store aligned on 'PerfAlign'.
515  uint64_t Off = offsetToAlignment(ToRemoveSize, PrefAlign);
516  if (Off != 0) {
517  if (ToRemoveSize <= (PrefAlign.value() - Off))
518  return false;
519  ToRemoveSize -= PrefAlign.value() - Off;
520  }
521  assert(isAligned(PrefAlign, ToRemoveSize) &&
522  "Should preserve selected alignment");
523  }
524 
525  assert(ToRemoveSize > 0 && "Shouldn't reach here if nothing to remove");
526  assert(DeadSize > ToRemoveSize && "Can't remove more than original size");
527 
528  uint64_t NewSize = DeadSize - ToRemoveSize;
529  if (auto *AMI = dyn_cast<AtomicMemIntrinsic>(DeadI)) {
530  // When shortening an atomic memory intrinsic, the newly shortened
531  // length must remain an integer multiple of the element size.
532  const uint32_t ElementSize = AMI->getElementSizeInBytes();
533  if (0 != NewSize % ElementSize)
534  return false;
535  }
536 
537  LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW "
538  << (IsOverwriteEnd ? "END" : "BEGIN") << ": " << *DeadI
539  << "\n KILLER [" << ToRemoveStart << ", "
540  << int64_t(ToRemoveStart + ToRemoveSize) << ")\n");
541 
542  Value *DeadWriteLength = DeadIntrinsic->getLength();
543  Value *TrimmedLength = ConstantInt::get(DeadWriteLength->getType(), NewSize);
544  DeadIntrinsic->setLength(TrimmedLength);
545  DeadIntrinsic->setDestAlignment(PrefAlign);
546 
547  if (!IsOverwriteEnd) {
548  Value *OrigDest = DeadIntrinsic->getRawDest();
549  Type *Int8PtrTy =
550  Type::getInt8PtrTy(DeadIntrinsic->getContext(),
551  OrigDest->getType()->getPointerAddressSpace());
552  Value *Dest = OrigDest;
553  if (OrigDest->getType() != Int8PtrTy)
554  Dest = CastInst::CreatePointerCast(OrigDest, Int8PtrTy, "", DeadI);
555  Value *Indices[1] = {
556  ConstantInt::get(DeadWriteLength->getType(), ToRemoveSize)};
558  Type::getInt8Ty(DeadIntrinsic->getContext()), Dest, Indices, "", DeadI);
559  NewDestGEP->setDebugLoc(DeadIntrinsic->getDebugLoc());
560  if (NewDestGEP->getType() != OrigDest->getType())
561  NewDestGEP = CastInst::CreatePointerCast(NewDestGEP, OrigDest->getType(),
562  "", DeadI);
563  DeadIntrinsic->setDest(NewDestGEP);
564  }
565 
566  // Finally update start and size of dead access.
567  if (!IsOverwriteEnd)
568  DeadStart += ToRemoveSize;
569  DeadSize = NewSize;
570 
571  return true;
572 }
573 
575  int64_t &DeadStart, uint64_t &DeadSize) {
576  if (IntervalMap.empty() || !isShortenableAtTheEnd(DeadI))
577  return false;
578 
579  OverlapIntervalsTy::iterator OII = --IntervalMap.end();
580  int64_t KillingStart = OII->second;
581  uint64_t KillingSize = OII->first - KillingStart;
582 
583  assert(OII->first - KillingStart >= 0 && "Size expected to be positive");
584 
585  if (KillingStart > DeadStart &&
586  // Note: "KillingStart - KillingStart" is known to be positive due to
587  // preceding check.
588  (uint64_t)(KillingStart - DeadStart) < DeadSize &&
589  // Note: "DeadSize - (uint64_t)(KillingStart - DeadStart)" is known to
590  // be non negative due to preceding checks.
591  KillingSize >= DeadSize - (uint64_t)(KillingStart - DeadStart)) {
592  if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
593  true)) {
594  IntervalMap.erase(OII);
595  return true;
596  }
597  }
598  return false;
599 }
600 
601 static bool tryToShortenBegin(Instruction *DeadI,
603  int64_t &DeadStart, uint64_t &DeadSize) {
605  return false;
606 
607  OverlapIntervalsTy::iterator OII = IntervalMap.begin();
608  int64_t KillingStart = OII->second;
609  uint64_t KillingSize = OII->first - KillingStart;
610 
611  assert(OII->first - KillingStart >= 0 && "Size expected to be positive");
612 
613  if (KillingStart <= DeadStart &&
614  // Note: "DeadStart - KillingStart" is known to be non negative due to
615  // preceding check.
616  KillingSize > (uint64_t)(DeadStart - KillingStart)) {
617  // Note: "KillingSize - (uint64_t)(DeadStart - DeadStart)" is known to
618  // be positive due to preceding checks.
619  assert(KillingSize - (uint64_t)(DeadStart - KillingStart) < DeadSize &&
620  "Should have been handled as OW_Complete");
621  if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
622  false)) {
623  IntervalMap.erase(OII);
624  return true;
625  }
626  }
627  return false;
628 }
629 
630 static Constant *
632  int64_t KillingOffset, int64_t DeadOffset,
633  const DataLayout &DL, BatchAAResults &AA,
634  DominatorTree *DT) {
635 
636  if (DeadI && isa<ConstantInt>(DeadI->getValueOperand()) &&
637  DL.typeSizeEqualsStoreSize(DeadI->getValueOperand()->getType()) &&
638  KillingI && isa<ConstantInt>(KillingI->getValueOperand()) &&
639  DL.typeSizeEqualsStoreSize(KillingI->getValueOperand()->getType()) &&
640  memoryIsNotModifiedBetween(DeadI, KillingI, AA, DL, DT)) {
641  // If the store we find is:
642  // a) partially overwritten by the store to 'Loc'
643  // b) the killing store is fully contained in the dead one and
644  // c) they both have a constant value
645  // d) none of the two stores need padding
646  // Merge the two stores, replacing the dead store's value with a
647  // merge of both values.
648  // TODO: Deal with other constant types (vectors, etc), and probably
649  // some mem intrinsics (if needed)
650 
651  APInt DeadValue = cast<ConstantInt>(DeadI->getValueOperand())->getValue();
652  APInt KillingValue =
653  cast<ConstantInt>(KillingI->getValueOperand())->getValue();
654  unsigned KillingBits = KillingValue.getBitWidth();
655  assert(DeadValue.getBitWidth() > KillingValue.getBitWidth());
656  KillingValue = KillingValue.zext(DeadValue.getBitWidth());
657 
658  // Offset of the smaller store inside the larger store
659  unsigned BitOffsetDiff = (KillingOffset - DeadOffset) * 8;
660  unsigned LShiftAmount =
661  DL.isBigEndian() ? DeadValue.getBitWidth() - BitOffsetDiff - KillingBits
662  : BitOffsetDiff;
663  APInt Mask = APInt::getBitsSet(DeadValue.getBitWidth(), LShiftAmount,
664  LShiftAmount + KillingBits);
665  // Clear the bits we'll be replacing, then OR with the smaller
666  // store, shifted appropriately.
667  APInt Merged = (DeadValue & ~Mask) | (KillingValue << LShiftAmount);
668  LLVM_DEBUG(dbgs() << "DSE: Merge Stores:\n Dead: " << *DeadI
669  << "\n Killing: " << *KillingI
670  << "\n Merged Value: " << Merged << '\n');
671  return ConstantInt::get(DeadI->getValueOperand()->getType(), Merged);
672  }
673  return nullptr;
674 }
675 
676 namespace {
677 // Returns true if \p I is an intrisnic that does not read or write memory.
678 bool isNoopIntrinsic(Instruction *I) {
679  if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
680  switch (II->getIntrinsicID()) {
681  case Intrinsic::lifetime_start:
682  case Intrinsic::lifetime_end:
683  case Intrinsic::invariant_end:
684  case Intrinsic::launder_invariant_group:
685  case Intrinsic::assume:
686  return true;
687  case Intrinsic::dbg_addr:
688  case Intrinsic::dbg_declare:
689  case Intrinsic::dbg_label:
690  case Intrinsic::dbg_value:
691  llvm_unreachable("Intrinsic should not be modeled in MemorySSA");
692  default:
693  return false;
694  }
695  }
696  return false;
697 }
698 
699 // Check if we can ignore \p D for DSE.
700 bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) {
701  Instruction *DI = D->getMemoryInst();
702  // Calls that only access inaccessible memory cannot read or write any memory
703  // locations we consider for elimination.
704  if (auto *CB = dyn_cast<CallBase>(DI))
705  if (CB->onlyAccessesInaccessibleMemory())
706  return true;
707 
708  // We can eliminate stores to locations not visible to the caller across
709  // throwing instructions.
710  if (DI->mayThrow() && !DefVisibleToCaller)
711  return true;
712 
713  // We can remove the dead stores, irrespective of the fence and its ordering
714  // (release/acquire/seq_cst). Fences only constraints the ordering of
715  // already visible stores, it does not make a store visible to other
716  // threads. So, skipping over a fence does not change a store from being
717  // dead.
718  if (isa<FenceInst>(DI))
719  return true;
720 
721  // Skip intrinsics that do not really read or modify memory.
722  if (isNoopIntrinsic(DI))
723  return true;
724 
725  return false;
726 }
727 
728 struct DSEState {
729  Function &F;
730  AliasAnalysis &AA;
732 
733  /// The single BatchAA instance that is used to cache AA queries. It will
734  /// not be invalidated over the whole run. This is safe, because:
735  /// 1. Only memory writes are removed, so the alias cache for memory
736  /// locations remains valid.
737  /// 2. No new instructions are added (only instructions removed), so cached
738  /// information for a deleted value cannot be accessed by a re-used new
739  /// value pointer.
740  BatchAAResults BatchAA;
741 
742  MemorySSA &MSSA;
743  DominatorTree &DT;
744  PostDominatorTree &PDT;
745  const TargetLibraryInfo &TLI;
746  const DataLayout &DL;
747  const LoopInfo &LI;
748 
749  // Whether the function contains any irreducible control flow, useful for
750  // being accurately able to detect loops.
751  bool ContainsIrreducibleLoops;
752 
753  // All MemoryDefs that potentially could kill other MemDefs.
755  // Any that should be skipped as they are already deleted
757  // Keep track whether a given object is captured before return or not.
758  DenseMap<const Value *, bool> CapturedBeforeReturn;
759  // Keep track of all of the objects that are invisible to the caller after
760  // the function returns.
761  DenseMap<const Value *, bool> InvisibleToCallerAfterRet;
762  // Keep track of blocks with throwing instructions not modeled in MemorySSA.
763  SmallPtrSet<BasicBlock *, 16> ThrowingBlocks;
764  // Post-order numbers for each basic block. Used to figure out if memory
765  // accesses are executed before another access.
766  DenseMap<BasicBlock *, unsigned> PostOrderNumbers;
767  // Values that are only used with assumes. Used to refine pointer escape
768  // analysis.
770 
771  /// Keep track of instructions (partly) overlapping with killing MemoryDefs per
772  /// basic block.
774  // Check if there are root nodes that are terminated by UnreachableInst.
775  // Those roots pessimize post-dominance queries. If there are such roots,
776  // fall back to CFG scan starting from all non-unreachable roots.
777  bool AnyUnreachableExit;
778 
779  // Class contains self-reference, make sure it's not copied/moved.
780  DSEState(const DSEState &) = delete;
781  DSEState &operator=(const DSEState &) = delete;
782 
783  DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT,
785  const TargetLibraryInfo &TLI, const LoopInfo &LI)
786  : F(F), AA(AA), EI(DT, LI, EphValues), BatchAA(AA, &EI), MSSA(MSSA),
787  DT(DT), PDT(PDT), TLI(TLI), DL(F.getParent()->getDataLayout()), LI(LI) {
788  // Collect blocks with throwing instructions not modeled in MemorySSA and
789  // alloc-like objects.
790  unsigned PO = 0;
791  for (BasicBlock *BB : post_order(&F)) {
792  PostOrderNumbers[BB] = PO++;
793  for (Instruction &I : *BB) {
794  MemoryAccess *MA = MSSA.getMemoryAccess(&I);
795  if (I.mayThrow() && !MA)
796  ThrowingBlocks.insert(I.getParent());
797 
798  auto *MD = dyn_cast_or_null<MemoryDef>(MA);
799  if (MD && MemDefs.size() < MemorySSADefsPerBlockLimit &&
800  (getLocForWrite(&I) || isMemTerminatorInst(&I)))
801  MemDefs.push_back(MD);
802  }
803  }
804 
805  // Treat byval or inalloca arguments the same as Allocas, stores to them are
806  // dead at the end of the function.
807  for (Argument &AI : F.args())
808  if (AI.hasPassPointeeByValueCopyAttr())
809  InvisibleToCallerAfterRet.insert({&AI, true});
810 
811  // Collect whether there is any irreducible control flow in the function.
812  ContainsIrreducibleLoops = mayContainIrreducibleControl(F, &LI);
813 
814  AnyUnreachableExit = any_of(PDT.roots(), [](const BasicBlock *E) {
815  return isa<UnreachableInst>(E->getTerminator());
816  });
817 
818  CodeMetrics::collectEphemeralValues(&F, &AC, EphValues);
819  }
820 
821  /// Return 'OW_Complete' if a store to the 'KillingLoc' location (by \p
822  /// KillingI instruction) completely overwrites a store to the 'DeadLoc'
823  /// location (by \p DeadI instruction).
824  /// Return OW_MaybePartial if \p KillingI does not completely overwrite
825  /// \p DeadI, but they both write to the same underlying object. In that
826  /// case, use isPartialOverwrite to check if \p KillingI partially overwrites
827  /// \p DeadI. Returns 'OR_None' if \p KillingI is known to not overwrite the
828  /// \p DeadI. Returns 'OW_Unknown' if nothing can be determined.
829  OverwriteResult isOverwrite(const Instruction *KillingI,
830  const Instruction *DeadI,
831  const MemoryLocation &KillingLoc,
832  const MemoryLocation &DeadLoc,
833  int64_t &KillingOff, int64_t &DeadOff) {
834  // AliasAnalysis does not always account for loops. Limit overwrite checks
835  // to dependencies for which we can guarantee they are independent of any
836  // loops they are in.
837  if (!isGuaranteedLoopIndependent(DeadI, KillingI, DeadLoc))
838  return OW_Unknown;
839 
840  const Value *DeadPtr = DeadLoc.Ptr->stripPointerCasts();
841  const Value *KillingPtr = KillingLoc.Ptr->stripPointerCasts();
842  const Value *DeadUndObj = getUnderlyingObject(DeadPtr);
843  const Value *KillingUndObj = getUnderlyingObject(KillingPtr);
844 
845  // Check whether the killing store overwrites the whole object, in which
846  // case the size/offset of the dead store does not matter.
847  if (DeadUndObj == KillingUndObj && KillingLoc.Size.isPrecise()) {
848  uint64_t KillingUndObjSize = getPointerSize(KillingUndObj, DL, TLI, &F);
849  if (KillingUndObjSize != MemoryLocation::UnknownSize &&
850  KillingUndObjSize == KillingLoc.Size.getValue())
851  return OW_Complete;
852  }
853 
854  // FIXME: Vet that this works for size upper-bounds. Seems unlikely that we'll
855  // get imprecise values here, though (except for unknown sizes).
856  if (!KillingLoc.Size.isPrecise() || !DeadLoc.Size.isPrecise()) {
857  // In case no constant size is known, try to an IR values for the number
858  // of bytes written and check if they match.
859  const auto *KillingMemI = dyn_cast<MemIntrinsic>(KillingI);
860  const auto *DeadMemI = dyn_cast<MemIntrinsic>(DeadI);
861  if (KillingMemI && DeadMemI) {
862  const Value *KillingV = KillingMemI->getLength();
863  const Value *DeadV = DeadMemI->getLength();
864  if (KillingV == DeadV && BatchAA.isMustAlias(DeadLoc, KillingLoc))
865  return OW_Complete;
866  }
867 
868  // Masked stores have imprecise locations, but we can reason about them
869  // to some extent.
870  return isMaskedStoreOverwrite(KillingI, DeadI, BatchAA);
871  }
872 
873  const uint64_t KillingSize = KillingLoc.Size.getValue();
874  const uint64_t DeadSize = DeadLoc.Size.getValue();
875 
876  // Query the alias information
877  AliasResult AAR = BatchAA.alias(KillingLoc, DeadLoc);
878 
879  // If the start pointers are the same, we just have to compare sizes to see if
880  // the killing store was larger than the dead store.
881  if (AAR == AliasResult::MustAlias) {
882  // Make sure that the KillingSize size is >= the DeadSize size.
883  if (KillingSize >= DeadSize)
884  return OW_Complete;
885  }
886 
887  // If we hit a partial alias we may have a full overwrite
888  if (AAR == AliasResult::PartialAlias && AAR.hasOffset()) {
889  int32_t Off = AAR.getOffset();
890  if (Off >= 0 && (uint64_t)Off + DeadSize <= KillingSize)
891  return OW_Complete;
892  }
893 
894  // If we can't resolve the same pointers to the same object, then we can't
895  // analyze them at all.
896  if (DeadUndObj != KillingUndObj) {
897  // Non aliasing stores to different objects don't overlap. Note that
898  // if the killing store is known to overwrite whole object (out of
899  // bounds access overwrites whole object as well) then it is assumed to
900  // completely overwrite any store to the same object even if they don't
901  // actually alias (see next check).
902  if (AAR == AliasResult::NoAlias)
903  return OW_None;
904  return OW_Unknown;
905  }
906 
907  // Okay, we have stores to two completely different pointers. Try to
908  // decompose the pointer into a "base + constant_offset" form. If the base
909  // pointers are equal, then we can reason about the two stores.
910  DeadOff = 0;
911  KillingOff = 0;
912  const Value *DeadBasePtr =
913  GetPointerBaseWithConstantOffset(DeadPtr, DeadOff, DL);
914  const Value *KillingBasePtr =
915  GetPointerBaseWithConstantOffset(KillingPtr, KillingOff, DL);
916 
917  // If the base pointers still differ, we have two completely different
918  // stores.
919  if (DeadBasePtr != KillingBasePtr)
920  return OW_Unknown;
921 
922  // The killing access completely overlaps the dead store if and only if
923  // both start and end of the dead one is "inside" the killing one:
924  // |<->|--dead--|<->|
925  // |-----killing------|
926  // Accesses may overlap if and only if start of one of them is "inside"
927  // another one:
928  // |<->|--dead--|<-------->|
929  // |-------killing--------|
930  // OR
931  // |-------dead-------|
932  // |<->|---killing---|<----->|
933  //
934  // We have to be careful here as *Off is signed while *.Size is unsigned.
935 
936  // Check if the dead access starts "not before" the killing one.
937  if (DeadOff >= KillingOff) {
938  // If the dead access ends "not after" the killing access then the
939  // dead one is completely overwritten by the killing one.
940  if (uint64_t(DeadOff - KillingOff) + DeadSize <= KillingSize)
941  return OW_Complete;
942  // If start of the dead access is "before" end of the killing access
943  // then accesses overlap.
944  else if ((uint64_t)(DeadOff - KillingOff) < KillingSize)
945  return OW_MaybePartial;
946  }
947  // If start of the killing access is "before" end of the dead access then
948  // accesses overlap.
949  else if ((uint64_t)(KillingOff - DeadOff) < DeadSize) {
950  return OW_MaybePartial;
951  }
952 
953  // Can reach here only if accesses are known not to overlap.
954  return OW_None;
955  }
956 
957  bool isInvisibleToCallerAfterRet(const Value *V) {
958  if (isa<AllocaInst>(V))
959  return true;
960  auto I = InvisibleToCallerAfterRet.insert({V, false});
961  if (I.second) {
962  if (!isInvisibleToCallerOnUnwind(V)) {
963  I.first->second = false;
964  } else if (isNoAliasCall(V)) {
965  I.first->second = !PointerMayBeCaptured(V, true, false, EphValues);
966  }
967  }
968  return I.first->second;
969  }
970 
971  bool isInvisibleToCallerOnUnwind(const Value *V) {
972  bool RequiresNoCaptureBeforeUnwind;
973  if (!isNotVisibleOnUnwind(V, RequiresNoCaptureBeforeUnwind))
974  return false;
975  if (!RequiresNoCaptureBeforeUnwind)
976  return true;
977 
978  auto I = CapturedBeforeReturn.insert({V, true});
979  if (I.second)
980  // NOTE: This could be made more precise by PointerMayBeCapturedBefore
981  // with the killing MemoryDef. But we refrain from doing so for now to
982  // limit compile-time and this does not cause any changes to the number
983  // of stores removed on a large test set in practice.
984  I.first->second = PointerMayBeCaptured(V, false, true, EphValues);
985  return !I.first->second;
986  }
987 
988  Optional<MemoryLocation> getLocForWrite(Instruction *I) const {
989  if (!I->mayWriteToMemory())
990  return None;
991 
992  if (auto *CB = dyn_cast<CallBase>(I))
993  return MemoryLocation::getForDest(CB, TLI);
994 
996  }
997 
998  /// Assuming this instruction has a dead analyzable write, can we delete
999  /// this instruction?
1000  bool isRemovable(Instruction *I) {
1001  assert(getLocForWrite(I) && "Must have analyzable write");
1002 
1003  // Don't remove volatile/atomic stores.
1004  if (StoreInst *SI = dyn_cast<StoreInst>(I))
1005  return SI->isUnordered();
1006 
1007  if (auto *CB = dyn_cast<CallBase>(I)) {
1008  // Don't remove volatile memory intrinsics.
1009  if (auto *MI = dyn_cast<MemIntrinsic>(CB))
1010  return !MI->isVolatile();
1011 
1012  // Never remove dead lifetime intrinsics, e.g. because they are followed
1013  // by a free.
1014  if (CB->isLifetimeStartOrEnd())
1015  return false;
1016 
1017  return CB->use_empty() && CB->willReturn() && CB->doesNotThrow() &&
1018  !CB->isTerminator();
1019  }
1020 
1021  return false;
1022  }
1023 
1024  /// Returns true if \p UseInst completely overwrites \p DefLoc
1025  /// (stored by \p DefInst).
1026  bool isCompleteOverwrite(const MemoryLocation &DefLoc, Instruction *DefInst,
1027  Instruction *UseInst) {
1028  // UseInst has a MemoryDef associated in MemorySSA. It's possible for a
1029  // MemoryDef to not write to memory, e.g. a volatile load is modeled as a
1030  // MemoryDef.
1031  if (!UseInst->mayWriteToMemory())
1032  return false;
1033 
1034  if (auto *CB = dyn_cast<CallBase>(UseInst))
1035  if (CB->onlyAccessesInaccessibleMemory())
1036  return false;
1037 
1038  int64_t InstWriteOffset, DepWriteOffset;
1039  if (auto CC = getLocForWrite(UseInst))
1040  return isOverwrite(UseInst, DefInst, *CC, DefLoc, InstWriteOffset,
1041  DepWriteOffset) == OW_Complete;
1042  return false;
1043  }
1044 
1045  /// Returns true if \p Def is not read before returning from the function.
1046  bool isWriteAtEndOfFunction(MemoryDef *Def) {
1047  LLVM_DEBUG(dbgs() << " Check if def " << *Def << " ("
1048  << *Def->getMemoryInst()
1049  << ") is at the end the function \n");
1050 
1051  auto MaybeLoc = getLocForWrite(Def->getMemoryInst());
1052  if (!MaybeLoc) {
1053  LLVM_DEBUG(dbgs() << " ... could not get location for write.\n");
1054  return false;
1055  }
1056 
1059  auto PushMemUses = [&WorkList, &Visited](MemoryAccess *Acc) {
1060  if (!Visited.insert(Acc).second)
1061  return;
1062  for (Use &U : Acc->uses())
1063  WorkList.push_back(cast<MemoryAccess>(U.getUser()));
1064  };
1065  PushMemUses(Def);
1066  for (unsigned I = 0; I < WorkList.size(); I++) {
1067  if (WorkList.size() >= MemorySSAScanLimit) {
1068  LLVM_DEBUG(dbgs() << " ... hit exploration limit.\n");
1069  return false;
1070  }
1071 
1072  MemoryAccess *UseAccess = WorkList[I];
1073  // Simply adding the users of MemoryPhi to the worklist is not enough,
1074  // because we might miss read clobbers in different iterations of a loop,
1075  // for example.
1076  // TODO: Add support for phi translation to handle the loop case.
1077  if (isa<MemoryPhi>(UseAccess))
1078  return false;
1079 
1080  // TODO: Checking for aliasing is expensive. Consider reducing the amount
1081  // of times this is called and/or caching it.
1082  Instruction *UseInst = cast<MemoryUseOrDef>(UseAccess)->getMemoryInst();
1083  if (isReadClobber(*MaybeLoc, UseInst)) {
1084  LLVM_DEBUG(dbgs() << " ... hit read clobber " << *UseInst << ".\n");
1085  return false;
1086  }
1087 
1088  if (MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess))
1089  PushMemUses(UseDef);
1090  }
1091  return true;
1092  }
1093 
1094  /// If \p I is a memory terminator like llvm.lifetime.end or free, return a
1095  /// pair with the MemoryLocation terminated by \p I and a boolean flag
1096  /// indicating whether \p I is a free-like call.
1098  getLocForTerminator(Instruction *I) const {
1099  uint64_t Len;
1100  Value *Ptr;
1101  if (match(I, m_Intrinsic<Intrinsic::lifetime_end>(m_ConstantInt(Len),
1102  m_Value(Ptr))))
1103  return {std::make_pair(MemoryLocation(Ptr, Len), false)};
1104 
1105  if (auto *CB = dyn_cast<CallBase>(I)) {
1106  if (isFreeCall(I, &TLI))
1107  return {std::make_pair(MemoryLocation::getAfter(CB->getArgOperand(0)),
1108  true)};
1109  }
1110 
1111  return None;
1112  }
1113 
1114  /// Returns true if \p I is a memory terminator instruction like
1115  /// llvm.lifetime.end or free.
1116  bool isMemTerminatorInst(Instruction *I) const {
1117  IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
1118  return (II && II->getIntrinsicID() == Intrinsic::lifetime_end) ||
1119  isFreeCall(I, &TLI);
1120  }
1121 
1122  /// Returns true if \p MaybeTerm is a memory terminator for \p Loc from
1123  /// instruction \p AccessI.
1124  bool isMemTerminator(const MemoryLocation &Loc, Instruction *AccessI,
1125  Instruction *MaybeTerm) {
1127  getLocForTerminator(MaybeTerm);
1128 
1129  if (!MaybeTermLoc)
1130  return false;
1131 
1132  // If the terminator is a free-like call, all accesses to the underlying
1133  // object can be considered terminated.
1134  if (getUnderlyingObject(Loc.Ptr) !=
1135  getUnderlyingObject(MaybeTermLoc->first.Ptr))
1136  return false;
1137 
1138  auto TermLoc = MaybeTermLoc->first;
1139  if (MaybeTermLoc->second) {
1140  const Value *LocUO = getUnderlyingObject(Loc.Ptr);
1141  return BatchAA.isMustAlias(TermLoc.Ptr, LocUO);
1142  }
1143  int64_t InstWriteOffset = 0;
1144  int64_t DepWriteOffset = 0;
1145  return isOverwrite(MaybeTerm, AccessI, TermLoc, Loc, InstWriteOffset,
1146  DepWriteOffset) == OW_Complete;
1147  }
1148 
1149  // Returns true if \p Use may read from \p DefLoc.
1150  bool isReadClobber(const MemoryLocation &DefLoc, Instruction *UseInst) {
1151  if (isNoopIntrinsic(UseInst))
1152  return false;
1153 
1154  // Monotonic or weaker atomic stores can be re-ordered and do not need to be
1155  // treated as read clobber.
1156  if (auto SI = dyn_cast<StoreInst>(UseInst))
1157  return isStrongerThan(SI->getOrdering(), AtomicOrdering::Monotonic);
1158 
1159  if (!UseInst->mayReadFromMemory())
1160  return false;
1161 
1162  if (auto *CB = dyn_cast<CallBase>(UseInst))
1163  if (CB->onlyAccessesInaccessibleMemory())
1164  return false;
1165 
1166  return isRefSet(BatchAA.getModRefInfo(UseInst, DefLoc));
1167  }
1168 
1169  /// Returns true if a dependency between \p Current and \p KillingDef is
1170  /// guaranteed to be loop invariant for the loops that they are in. Either
1171  /// because they are known to be in the same block, in the same loop level or
1172  /// by guaranteeing that \p CurrentLoc only references a single MemoryLocation
1173  /// during execution of the containing function.
1174  bool isGuaranteedLoopIndependent(const Instruction *Current,
1175  const Instruction *KillingDef,
1176  const MemoryLocation &CurrentLoc) {
1177  // If the dependency is within the same block or loop level (being careful
1178  // of irreducible loops), we know that AA will return a valid result for the
1179  // memory dependency. (Both at the function level, outside of any loop,
1180  // would also be valid but we currently disable that to limit compile time).
1181  if (Current->getParent() == KillingDef->getParent())
1182  return true;
1183  const Loop *CurrentLI = LI.getLoopFor(Current->getParent());
1184  if (!ContainsIrreducibleLoops && CurrentLI &&
1185  CurrentLI == LI.getLoopFor(KillingDef->getParent()))
1186  return true;
1187  // Otherwise check the memory location is invariant to any loops.
1188  return isGuaranteedLoopInvariant(CurrentLoc.Ptr);
1189  }
1190 
1191  /// Returns true if \p Ptr is guaranteed to be loop invariant for any possible
1192  /// loop. In particular, this guarantees that it only references a single
1193  /// MemoryLocation during execution of the containing function.
1194  bool isGuaranteedLoopInvariant(const Value *Ptr) {
1195  Ptr = Ptr->stripPointerCasts();
1196  if (auto *GEP = dyn_cast<GEPOperator>(Ptr))
1197  if (GEP->hasAllConstantIndices())
1198  Ptr = GEP->getPointerOperand()->stripPointerCasts();
1199 
1200  if (auto *I = dyn_cast<Instruction>(Ptr))
1201  return I->getParent()->isEntryBlock();
1202  return true;
1203  }
1204 
1205  // Find a MemoryDef writing to \p KillingLoc and dominating \p StartAccess,
1206  // with no read access between them or on any other path to a function exit
1207  // block if \p KillingLoc is not accessible after the function returns. If
1208  // there is no such MemoryDef, return None. The returned value may not
1209  // (completely) overwrite \p KillingLoc. Currently we bail out when we
1210  // encounter an aliasing MemoryUse (read).
1212  getDomMemoryDef(MemoryDef *KillingDef, MemoryAccess *StartAccess,
1213  const MemoryLocation &KillingLoc, const Value *KillingUndObj,
1214  unsigned &ScanLimit, unsigned &WalkerStepLimit,
1215  bool IsMemTerm, unsigned &PartialLimit) {
1216  if (ScanLimit == 0 || WalkerStepLimit == 0) {
1217  LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n");
1218  return None;
1219  }
1220 
1221  MemoryAccess *Current = StartAccess;
1222  Instruction *KillingI = KillingDef->getMemoryInst();
1223  LLVM_DEBUG(dbgs() << " trying to get dominating access\n");
1224 
1225  // Only optimize defining access of KillingDef when directly starting at its
1226  // defining access. The defining access also must only access KillingLoc. At
1227  // the moment we only support instructions with a single write location, so
1228  // it should be sufficient to disable optimizations for instructions that
1229  // also read from memory.
1230  bool CanOptimize = OptimizeMemorySSA &&
1231  KillingDef->getDefiningAccess() == StartAccess &&
1232  !KillingI->mayReadFromMemory();
1233 
1234  // Find the next clobbering Mod access for DefLoc, starting at StartAccess.
1235  Optional<MemoryLocation> CurrentLoc;
1236  for (;; Current = cast<MemoryDef>(Current)->getDefiningAccess()) {
1237  LLVM_DEBUG({
1238  dbgs() << " visiting " << *Current;
1239  if (!MSSA.isLiveOnEntryDef(Current) && isa<MemoryUseOrDef>(Current))
1240  dbgs() << " (" << *cast<MemoryUseOrDef>(Current)->getMemoryInst()
1241  << ")";
1242  dbgs() << "\n";
1243  });
1244 
1245  // Reached TOP.
1246  if (MSSA.isLiveOnEntryDef(Current)) {
1247  LLVM_DEBUG(dbgs() << " ... found LiveOnEntryDef\n");
1248  if (CanOptimize && Current != KillingDef->getDefiningAccess())
1249  // The first clobbering def is... none.
1250  KillingDef->setOptimized(Current);
1251  return None;
1252  }
1253 
1254  // Cost of a step. Accesses in the same block are more likely to be valid
1255  // candidates for elimination, hence consider them cheaper.
1256  unsigned StepCost = KillingDef->getBlock() == Current->getBlock()
1259  if (WalkerStepLimit <= StepCost) {
1260  LLVM_DEBUG(dbgs() << " ... hit walker step limit\n");
1261  return None;
1262  }
1263  WalkerStepLimit -= StepCost;
1264 
1265  // Return for MemoryPhis. They cannot be eliminated directly and the
1266  // caller is responsible for traversing them.
1267  if (isa<MemoryPhi>(Current)) {
1268  LLVM_DEBUG(dbgs() << " ... found MemoryPhi\n");
1269  return Current;
1270  }
1271 
1272  // Below, check if CurrentDef is a valid candidate to be eliminated by
1273  // KillingDef. If it is not, check the next candidate.
1274  MemoryDef *CurrentDef = cast<MemoryDef>(Current);
1275  Instruction *CurrentI = CurrentDef->getMemoryInst();
1276 
1277  if (canSkipDef(CurrentDef, !isInvisibleToCallerOnUnwind(KillingUndObj))) {
1278  CanOptimize = false;
1279  continue;
1280  }
1281 
1282  // Before we try to remove anything, check for any extra throwing
1283  // instructions that block us from DSEing
1284  if (mayThrowBetween(KillingI, CurrentI, KillingUndObj)) {
1285  LLVM_DEBUG(dbgs() << " ... skip, may throw!\n");
1286  return None;
1287  }
1288 
1289  // Check for anything that looks like it will be a barrier to further
1290  // removal
1291  if (isDSEBarrier(KillingUndObj, CurrentI)) {
1292  LLVM_DEBUG(dbgs() << " ... skip, barrier\n");
1293  return None;
1294  }
1295 
1296  // If Current is known to be on path that reads DefLoc or is a read
1297  // clobber, bail out, as the path is not profitable. We skip this check
1298  // for intrinsic calls, because the code knows how to handle memcpy
1299  // intrinsics.
1300  if (!isa<IntrinsicInst>(CurrentI) && isReadClobber(KillingLoc, CurrentI))
1301  return None;
1302 
1303  // Quick check if there are direct uses that are read-clobbers.
1304  if (any_of(Current->uses(), [this, &KillingLoc, StartAccess](Use &U) {
1305  if (auto *UseOrDef = dyn_cast<MemoryUseOrDef>(U.getUser()))
1306  return !MSSA.dominates(StartAccess, UseOrDef) &&
1307  isReadClobber(KillingLoc, UseOrDef->getMemoryInst());
1308  return false;
1309  })) {
1310  LLVM_DEBUG(dbgs() << " ... found a read clobber\n");
1311  return None;
1312  }
1313 
1314  // If Current does not have an analyzable write location or is not
1315  // removable, skip it.
1316  CurrentLoc = getLocForWrite(CurrentI);
1317  if (!CurrentLoc || !isRemovable(CurrentI)) {
1318  CanOptimize = false;
1319  continue;
1320  }
1321 
1322  // AliasAnalysis does not account for loops. Limit elimination to
1323  // candidates for which we can guarantee they always store to the same
1324  // memory location and not located in different loops.
1325  if (!isGuaranteedLoopIndependent(CurrentI, KillingI, *CurrentLoc)) {
1326  LLVM_DEBUG(dbgs() << " ... not guaranteed loop independent\n");
1327  CanOptimize = false;
1328  continue;
1329  }
1330 
1331  if (IsMemTerm) {
1332  // If the killing def is a memory terminator (e.g. lifetime.end), check
1333  // the next candidate if the current Current does not write the same
1334  // underlying object as the terminator.
1335  if (!isMemTerminator(*CurrentLoc, CurrentI, KillingI)) {
1336  CanOptimize = false;
1337  continue;
1338  }
1339  } else {
1340  int64_t KillingOffset = 0;
1341  int64_t DeadOffset = 0;
1342  auto OR = isOverwrite(KillingI, CurrentI, KillingLoc, *CurrentLoc,
1343  KillingOffset, DeadOffset);
1344  if (CanOptimize) {
1345  // CurrentDef is the earliest write clobber of KillingDef. Use it as
1346  // optimized access. Do not optimize if CurrentDef is already the
1347  // defining access of KillingDef.
1348  if (CurrentDef != KillingDef->getDefiningAccess() &&
1349  (OR == OW_Complete || OR == OW_MaybePartial))
1350  KillingDef->setOptimized(CurrentDef);
1351 
1352  // Once a may-aliasing def is encountered do not set an optimized
1353  // access.
1354  if (OR != OW_None)
1355  CanOptimize = false;
1356  }
1357 
1358  // If Current does not write to the same object as KillingDef, check
1359  // the next candidate.
1360  if (OR == OW_Unknown || OR == OW_None)
1361  continue;
1362  else if (OR == OW_MaybePartial) {
1363  // If KillingDef only partially overwrites Current, check the next
1364  // candidate if the partial step limit is exceeded. This aggressively
1365  // limits the number of candidates for partial store elimination,
1366  // which are less likely to be removable in the end.
1367  if (PartialLimit <= 1) {
1368  WalkerStepLimit -= 1;
1369  LLVM_DEBUG(dbgs() << " ... reached partial limit ... continue with next access\n");
1370  continue;
1371  }
1372  PartialLimit -= 1;
1373  }
1374  }
1375  break;
1376  };
1377 
1378  // Accesses to objects accessible after the function returns can only be
1379  // eliminated if the access is dead along all paths to the exit. Collect
1380  // the blocks with killing (=completely overwriting MemoryDefs) and check if
1381  // they cover all paths from MaybeDeadAccess to any function exit.
1382  SmallPtrSet<Instruction *, 16> KillingDefs;
1383  KillingDefs.insert(KillingDef->getMemoryInst());
1384  MemoryAccess *MaybeDeadAccess = Current;
1385  MemoryLocation MaybeDeadLoc = *CurrentLoc;
1386  Instruction *MaybeDeadI = cast<MemoryDef>(MaybeDeadAccess)->getMemoryInst();
1387  LLVM_DEBUG(dbgs() << " Checking for reads of " << *MaybeDeadAccess << " ("
1388  << *MaybeDeadI << ")\n");
1389 
1391  auto PushMemUses = [&WorkList](MemoryAccess *Acc) {
1392  for (Use &U : Acc->uses())
1393  WorkList.insert(cast<MemoryAccess>(U.getUser()));
1394  };
1395  PushMemUses(MaybeDeadAccess);
1396 
1397  // Check if DeadDef may be read.
1398  for (unsigned I = 0; I < WorkList.size(); I++) {
1399  MemoryAccess *UseAccess = WorkList[I];
1400 
1401  LLVM_DEBUG(dbgs() << " " << *UseAccess);
1402  // Bail out if the number of accesses to check exceeds the scan limit.
1403  if (ScanLimit < (WorkList.size() - I)) {
1404  LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n");
1405  return None;
1406  }
1407  --ScanLimit;
1408  NumDomMemDefChecks++;
1409 
1410  if (isa<MemoryPhi>(UseAccess)) {
1411  if (any_of(KillingDefs, [this, UseAccess](Instruction *KI) {
1412  return DT.properlyDominates(KI->getParent(),
1413  UseAccess->getBlock());
1414  })) {
1415  LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing block\n");
1416  continue;
1417  }
1418  LLVM_DEBUG(dbgs() << "\n ... adding PHI uses\n");
1419  PushMemUses(UseAccess);
1420  continue;
1421  }
1422 
1423  Instruction *UseInst = cast<MemoryUseOrDef>(UseAccess)->getMemoryInst();
1424  LLVM_DEBUG(dbgs() << " (" << *UseInst << ")\n");
1425 
1426  if (any_of(KillingDefs, [this, UseInst](Instruction *KI) {
1427  return DT.dominates(KI, UseInst);
1428  })) {
1429  LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing def\n");
1430  continue;
1431  }
1432 
1433  // A memory terminator kills all preceeding MemoryDefs and all succeeding
1434  // MemoryAccesses. We do not have to check it's users.
1435  if (isMemTerminator(MaybeDeadLoc, MaybeDeadI, UseInst)) {
1436  LLVM_DEBUG(
1437  dbgs()
1438  << " ... skipping, memterminator invalidates following accesses\n");
1439  continue;
1440  }
1441 
1442  if (isNoopIntrinsic(cast<MemoryUseOrDef>(UseAccess)->getMemoryInst())) {
1443  LLVM_DEBUG(dbgs() << " ... adding uses of intrinsic\n");
1444  PushMemUses(UseAccess);
1445  continue;
1446  }
1447 
1448  if (UseInst->mayThrow() && !isInvisibleToCallerOnUnwind(KillingUndObj)) {
1449  LLVM_DEBUG(dbgs() << " ... found throwing instruction\n");
1450  return None;
1451  }
1452 
1453  // Uses which may read the original MemoryDef mean we cannot eliminate the
1454  // original MD. Stop walk.
1455  if (isReadClobber(MaybeDeadLoc, UseInst)) {
1456  LLVM_DEBUG(dbgs() << " ... found read clobber\n");
1457  return None;
1458  }
1459 
1460  // If this worklist walks back to the original memory access (and the
1461  // pointer is not guarenteed loop invariant) then we cannot assume that a
1462  // store kills itself.
1463  if (MaybeDeadAccess == UseAccess &&
1464  !isGuaranteedLoopInvariant(MaybeDeadLoc.Ptr)) {
1465  LLVM_DEBUG(dbgs() << " ... found not loop invariant self access\n");
1466  return None;
1467  }
1468  // Otherwise, for the KillingDef and MaybeDeadAccess we only have to check
1469  // if it reads the memory location.
1470  // TODO: It would probably be better to check for self-reads before
1471  // calling the function.
1472  if (KillingDef == UseAccess || MaybeDeadAccess == UseAccess) {
1473  LLVM_DEBUG(dbgs() << " ... skipping killing def/dom access\n");
1474  continue;
1475  }
1476 
1477  // Check all uses for MemoryDefs, except for defs completely overwriting
1478  // the original location. Otherwise we have to check uses of *all*
1479  // MemoryDefs we discover, including non-aliasing ones. Otherwise we might
1480  // miss cases like the following
1481  // 1 = Def(LoE) ; <----- DeadDef stores [0,1]
1482  // 2 = Def(1) ; (2, 1) = NoAlias, stores [2,3]
1483  // Use(2) ; MayAlias 2 *and* 1, loads [0, 3].
1484  // (The Use points to the *first* Def it may alias)
1485  // 3 = Def(1) ; <---- Current (3, 2) = NoAlias, (3,1) = MayAlias,
1486  // stores [0,1]
1487  if (MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess)) {
1488  if (isCompleteOverwrite(MaybeDeadLoc, MaybeDeadI, UseInst)) {
1489  BasicBlock *MaybeKillingBlock = UseInst->getParent();
1490  if (PostOrderNumbers.find(MaybeKillingBlock)->second <
1491  PostOrderNumbers.find(MaybeDeadAccess->getBlock())->second) {
1492  if (!isInvisibleToCallerAfterRet(KillingUndObj)) {
1493  LLVM_DEBUG(dbgs()
1494  << " ... found killing def " << *UseInst << "\n");
1495  KillingDefs.insert(UseInst);
1496  }
1497  } else {
1498  LLVM_DEBUG(dbgs()
1499  << " ... found preceeding def " << *UseInst << "\n");
1500  return None;
1501  }
1502  } else
1503  PushMemUses(UseDef);
1504  }
1505  }
1506 
1507  // For accesses to locations visible after the function returns, make sure
1508  // that the location is dead (=overwritten) along all paths from
1509  // MaybeDeadAccess to the exit.
1510  if (!isInvisibleToCallerAfterRet(KillingUndObj)) {
1511  SmallPtrSet<BasicBlock *, 16> KillingBlocks;
1512  for (Instruction *KD : KillingDefs)
1513  KillingBlocks.insert(KD->getParent());
1514  assert(!KillingBlocks.empty() &&
1515  "Expected at least a single killing block");
1516 
1517  // Find the common post-dominator of all killing blocks.
1518  BasicBlock *CommonPred = *KillingBlocks.begin();
1519  for (BasicBlock *BB : llvm::drop_begin(KillingBlocks)) {
1520  if (!CommonPred)
1521  break;
1522  CommonPred = PDT.findNearestCommonDominator(CommonPred, BB);
1523  }
1524 
1525  // If the common post-dominator does not post-dominate MaybeDeadAccess,
1526  // there is a path from MaybeDeadAccess to an exit not going through a
1527  // killing block.
1528  if (!PDT.dominates(CommonPred, MaybeDeadAccess->getBlock())) {
1529  if (!AnyUnreachableExit)
1530  return None;
1531 
1532  // Fall back to CFG scan starting at all non-unreachable roots if not
1533  // all paths to the exit go through CommonPred.
1534  CommonPred = nullptr;
1535  }
1536 
1537  // If CommonPred itself is in the set of killing blocks, we're done.
1538  if (KillingBlocks.count(CommonPred))
1539  return {MaybeDeadAccess};
1540 
1541  SetVector<BasicBlock *> WorkList;
1542  // If CommonPred is null, there are multiple exits from the function.
1543  // They all have to be added to the worklist.
1544  if (CommonPred)
1545  WorkList.insert(CommonPred);
1546  else
1547  for (BasicBlock *R : PDT.roots()) {
1548  if (!isa<UnreachableInst>(R->getTerminator()))
1549  WorkList.insert(R);
1550  }
1551 
1552  NumCFGTries++;
1553  // Check if all paths starting from an exit node go through one of the
1554  // killing blocks before reaching MaybeDeadAccess.
1555  for (unsigned I = 0; I < WorkList.size(); I++) {
1556  NumCFGChecks++;
1557  BasicBlock *Current = WorkList[I];
1558  if (KillingBlocks.count(Current))
1559  continue;
1560  if (Current == MaybeDeadAccess->getBlock())
1561  return None;
1562 
1563  // MaybeDeadAccess is reachable from the entry, so we don't have to
1564  // explore unreachable blocks further.
1565  if (!DT.isReachableFromEntry(Current))
1566  continue;
1567 
1568  for (BasicBlock *Pred : predecessors(Current))
1569  WorkList.insert(Pred);
1570 
1571  if (WorkList.size() >= MemorySSAPathCheckLimit)
1572  return None;
1573  }
1574  NumCFGSuccess++;
1575  }
1576 
1577  // No aliasing MemoryUses of MaybeDeadAccess found, MaybeDeadAccess is
1578  // potentially dead.
1579  return {MaybeDeadAccess};
1580  }
1581 
1582  // Delete dead memory defs
1584  MemorySSAUpdater Updater(&MSSA);
1585  SmallVector<Instruction *, 32> NowDeadInsts;
1586  NowDeadInsts.push_back(SI);
1587  --NumFastOther;
1588 
1589  while (!NowDeadInsts.empty()) {
1590  Instruction *DeadInst = NowDeadInsts.pop_back_val();
1591  ++NumFastOther;
1592 
1593  // Try to preserve debug information attached to the dead instruction.
1594  salvageDebugInfo(*DeadInst);
1595  salvageKnowledge(DeadInst);
1596 
1597  // Remove the Instruction from MSSA.
1598  if (MemoryAccess *MA = MSSA.getMemoryAccess(DeadInst)) {
1599  if (MemoryDef *MD = dyn_cast<MemoryDef>(MA)) {
1600  SkipStores.insert(MD);
1601  }
1602 
1603  Updater.removeMemoryAccess(MA);
1604  }
1605 
1606  auto I = IOLs.find(DeadInst->getParent());
1607  if (I != IOLs.end())
1608  I->second.erase(DeadInst);
1609  // Remove its operands
1610  for (Use &O : DeadInst->operands())
1611  if (Instruction *OpI = dyn_cast<Instruction>(O)) {
1612  O = nullptr;
1613  if (isInstructionTriviallyDead(OpI, &TLI))
1614  NowDeadInsts.push_back(OpI);
1615  }
1616 
1617  EI.removeInstruction(DeadInst);
1618  DeadInst->eraseFromParent();
1619  }
1620  }
1621 
1622  // Check for any extra throws between \p KillingI and \p DeadI that block
1623  // DSE. This only checks extra maythrows (those that aren't MemoryDef's).
1624  // MemoryDef that may throw are handled during the walk from one def to the
1625  // next.
1626  bool mayThrowBetween(Instruction *KillingI, Instruction *DeadI,
1627  const Value *KillingUndObj) {
1628  // First see if we can ignore it by using the fact that KillingI is an
1629  // alloca/alloca like object that is not visible to the caller during
1630  // execution of the function.
1631  if (KillingUndObj && isInvisibleToCallerOnUnwind(KillingUndObj))
1632  return false;
1633 
1634  if (KillingI->getParent() == DeadI->getParent())
1635  return ThrowingBlocks.count(KillingI->getParent());
1636  return !ThrowingBlocks.empty();
1637  }
1638 
1639  // Check if \p DeadI acts as a DSE barrier for \p KillingI. The following
1640  // instructions act as barriers:
1641  // * A memory instruction that may throw and \p KillingI accesses a non-stack
1642  // object.
1643  // * Atomic stores stronger that monotonic.
1644  bool isDSEBarrier(const Value *KillingUndObj, Instruction *DeadI) {
1645  // If DeadI may throw it acts as a barrier, unless we are to an
1646  // alloca/alloca like object that does not escape.
1647  if (DeadI->mayThrow() && !isInvisibleToCallerOnUnwind(KillingUndObj))
1648  return true;
1649 
1650  // If DeadI is an atomic load/store stronger than monotonic, do not try to
1651  // eliminate/reorder it.
1652  if (DeadI->isAtomic()) {
1653  if (auto *LI = dyn_cast<LoadInst>(DeadI))
1654  return isStrongerThanMonotonic(LI->getOrdering());
1655  if (auto *SI = dyn_cast<StoreInst>(DeadI))
1656  return isStrongerThanMonotonic(SI->getOrdering());
1657  if (auto *ARMW = dyn_cast<AtomicRMWInst>(DeadI))
1658  return isStrongerThanMonotonic(ARMW->getOrdering());
1659  if (auto *CmpXchg = dyn_cast<AtomicCmpXchgInst>(DeadI))
1660  return isStrongerThanMonotonic(CmpXchg->getSuccessOrdering()) ||
1661  isStrongerThanMonotonic(CmpXchg->getFailureOrdering());
1662  llvm_unreachable("other instructions should be skipped in MemorySSA");
1663  }
1664  return false;
1665  }
1666 
1667  /// Eliminate writes to objects that are not visible in the caller and are not
1668  /// accessed before returning from the function.
1669  bool eliminateDeadWritesAtEndOfFunction() {
1670  bool MadeChange = false;
1671  LLVM_DEBUG(
1672  dbgs()
1673  << "Trying to eliminate MemoryDefs at the end of the function\n");
1674  for (MemoryDef *Def : llvm::reverse(MemDefs)) {
1675  if (SkipStores.contains(Def))
1676  continue;
1677 
1678  Instruction *DefI = Def->getMemoryInst();
1679  auto DefLoc = getLocForWrite(DefI);
1680  if (!DefLoc || !isRemovable(DefI))
1681  continue;
1682 
1683  // NOTE: Currently eliminating writes at the end of a function is limited
1684  // to MemoryDefs with a single underlying object, to save compile-time. In
1685  // practice it appears the case with multiple underlying objects is very
1686  // uncommon. If it turns out to be important, we can use
1687  // getUnderlyingObjects here instead.
1688  const Value *UO = getUnderlyingObject(DefLoc->Ptr);
1689  if (!isInvisibleToCallerAfterRet(UO))
1690  continue;
1691 
1692  if (isWriteAtEndOfFunction(Def)) {
1693  // See through pointer-to-pointer bitcasts
1694  LLVM_DEBUG(dbgs() << " ... MemoryDef is not accessed until the end "
1695  "of the function\n");
1696  deleteDeadInstruction(DefI);
1697  ++NumFastStores;
1698  MadeChange = true;
1699  }
1700  }
1701  return MadeChange;
1702  }
1703 
1704  /// If we have a zero initializing memset following a call to malloc,
1705  /// try folding it into a call to calloc.
1706  bool tryFoldIntoCalloc(MemoryDef *Def, const Value *DefUO) {
1707  Instruction *DefI = Def->getMemoryInst();
1708  MemSetInst *MemSet = dyn_cast<MemSetInst>(DefI);
1709  if (!MemSet)
1710  // TODO: Could handle zero store to small allocation as well.
1711  return false;
1712  Constant *StoredConstant = dyn_cast<Constant>(MemSet->getValue());
1713  if (!StoredConstant || !StoredConstant->isNullValue())
1714  return false;
1715 
1716  if (!isRemovable(DefI))
1717  // The memset might be volatile..
1718  return false;
1719 
1720  if (F.hasFnAttribute(Attribute::SanitizeMemory) ||
1721  F.hasFnAttribute(Attribute::SanitizeAddress) ||
1722  F.hasFnAttribute(Attribute::SanitizeHWAddress) ||
1723  F.getName() == "calloc")
1724  return false;
1725  auto *Malloc = const_cast<CallInst *>(dyn_cast<CallInst>(DefUO));
1726  if (!Malloc)
1727  return false;
1728  auto *InnerCallee = Malloc->getCalledFunction();
1729  if (!InnerCallee)
1730  return false;
1731  LibFunc Func;
1732  if (!TLI.getLibFunc(*InnerCallee, Func) || !TLI.has(Func) ||
1733  Func != LibFunc_malloc)
1734  return false;
1735 
1736  auto shouldCreateCalloc = [](CallInst *Malloc, CallInst *Memset) {
1737  // Check for br(icmp ptr, null), truebb, falsebb) pattern at the end
1738  // of malloc block
1739  auto *MallocBB = Malloc->getParent(),
1740  *MemsetBB = Memset->getParent();
1741  if (MallocBB == MemsetBB)
1742  return true;
1743  auto *Ptr = Memset->getArgOperand(0);
1744  auto *TI = MallocBB->getTerminator();
1745  ICmpInst::Predicate Pred;
1746  BasicBlock *TrueBB, *FalseBB;
1747  if (!match(TI, m_Br(m_ICmp(Pred, m_Specific(Ptr), m_Zero()), TrueBB,
1748  FalseBB)))
1749  return false;
1750  if (Pred != ICmpInst::ICMP_EQ || MemsetBB != FalseBB)
1751  return false;
1752  return true;
1753  };
1754 
1755  if (Malloc->getOperand(0) != MemSet->getLength())
1756  return false;
1757  if (!shouldCreateCalloc(Malloc, MemSet) ||
1758  !DT.dominates(Malloc, MemSet) ||
1759  !memoryIsNotModifiedBetween(Malloc, MemSet, BatchAA, DL, &DT))
1760  return false;
1761  IRBuilder<> IRB(Malloc);
1762  const auto &DL = Malloc->getModule()->getDataLayout();
1763  auto *Calloc =
1764  emitCalloc(ConstantInt::get(IRB.getIntPtrTy(DL), 1),
1765  Malloc->getArgOperand(0), IRB, TLI);
1766  if (!Calloc)
1767  return false;
1768  MemorySSAUpdater Updater(&MSSA);
1769  auto *LastDef =
1770  cast<MemoryDef>(Updater.getMemorySSA()->getMemoryAccess(Malloc));
1771  auto *NewAccess =
1772  Updater.createMemoryAccessAfter(cast<Instruction>(Calloc), LastDef,
1773  LastDef);
1774  auto *NewAccessMD = cast<MemoryDef>(NewAccess);
1775  Updater.insertDef(NewAccessMD, /*RenameUses=*/true);
1776  Updater.removeMemoryAccess(Malloc);
1777  Malloc->replaceAllUsesWith(Calloc);
1778  Malloc->eraseFromParent();
1779  return true;
1780  }
1781 
1782  /// \returns true if \p Def is a no-op store, either because it
1783  /// directly stores back a loaded value or stores zero to a calloced object.
1784  bool storeIsNoop(MemoryDef *Def, const Value *DefUO) {
1785  Instruction *DefI = Def->getMemoryInst();
1786  StoreInst *Store = dyn_cast<StoreInst>(DefI);
1787  MemSetInst *MemSet = dyn_cast<MemSetInst>(DefI);
1788  Constant *StoredConstant = nullptr;
1789  if (Store)
1790  StoredConstant = dyn_cast<Constant>(Store->getOperand(0));
1791  else if (MemSet)
1792  StoredConstant = dyn_cast<Constant>(MemSet->getValue());
1793  else
1794  return false;
1795 
1796  if (!isRemovable(DefI))
1797  return false;
1798 
1799  if (StoredConstant) {
1800  Constant *InitC =
1801  getInitialValueOfAllocation(DefUO, &TLI, StoredConstant->getType());
1802  // If the clobbering access is LiveOnEntry, no instructions between them
1803  // can modify the memory location.
1804  if (InitC && InitC == StoredConstant)
1805  return MSSA.isLiveOnEntryDef(
1807  }
1808 
1809  if (!Store)
1810  return false;
1811 
1812  if (auto *LoadI = dyn_cast<LoadInst>(Store->getOperand(0))) {
1813  if (LoadI->getPointerOperand() == Store->getOperand(1)) {
1814  // Get the defining access for the load.
1815  auto *LoadAccess = MSSA.getMemoryAccess(LoadI)->getDefiningAccess();
1816  // Fast path: the defining accesses are the same.
1817  if (LoadAccess == Def->getDefiningAccess())
1818  return true;
1819 
1820  // Look through phi accesses. Recursively scan all phi accesses by
1821  // adding them to a worklist. Bail when we run into a memory def that
1822  // does not match LoadAccess.
1823  SetVector<MemoryAccess *> ToCheck;
1824  MemoryAccess *Current =
1826  // We don't want to bail when we run into the store memory def. But,
1827  // the phi access may point to it. So, pretend like we've already
1828  // checked it.
1829  ToCheck.insert(Def);
1830  ToCheck.insert(Current);
1831  // Start at current (1) to simulate already having checked Def.
1832  for (unsigned I = 1; I < ToCheck.size(); ++I) {
1833  Current = ToCheck[I];
1834  if (auto PhiAccess = dyn_cast<MemoryPhi>(Current)) {
1835  // Check all the operands.
1836  for (auto &Use : PhiAccess->incoming_values())
1837  ToCheck.insert(cast<MemoryAccess>(&Use));
1838  continue;
1839  }
1840 
1841  // If we found a memory def, bail. This happens when we have an
1842  // unrelated write in between an otherwise noop store.
1843  assert(isa<MemoryDef>(Current) &&
1844  "Only MemoryDefs should reach here.");
1845  // TODO: Skip no alias MemoryDefs that have no aliasing reads.
1846  // We are searching for the definition of the store's destination.
1847  // So, if that is the same definition as the load, then this is a
1848  // noop. Otherwise, fail.
1849  if (LoadAccess != Current)
1850  return false;
1851  }
1852  return true;
1853  }
1854  }
1855 
1856  return false;
1857  }
1858 
1859  bool removePartiallyOverlappedStores(InstOverlapIntervalsTy &IOL) {
1860  bool Changed = false;
1861  for (auto OI : IOL) {
1862  Instruction *DeadI = OI.first;
1863  MemoryLocation Loc = *getLocForWrite(DeadI);
1864  assert(isRemovable(DeadI) && "Expect only removable instruction");
1865 
1866  const Value *Ptr = Loc.Ptr->stripPointerCasts();
1867  int64_t DeadStart = 0;
1868  uint64_t DeadSize = Loc.Size.getValue();
1869  GetPointerBaseWithConstantOffset(Ptr, DeadStart, DL);
1870  OverlapIntervalsTy &IntervalMap = OI.second;
1871  Changed |= tryToShortenEnd(DeadI, IntervalMap, DeadStart, DeadSize);
1872  if (IntervalMap.empty())
1873  continue;
1874  Changed |= tryToShortenBegin(DeadI, IntervalMap, DeadStart, DeadSize);
1875  }
1876  return Changed;
1877  }
1878 
1879  /// Eliminates writes to locations where the value that is being written
1880  /// is already stored at the same location.
1881  bool eliminateRedundantStoresOfExistingValues() {
1882  bool MadeChange = false;
1883  LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs that write the "
1884  "already existing value\n");
1885  for (auto *Def : MemDefs) {
1886  if (SkipStores.contains(Def) || MSSA.isLiveOnEntryDef(Def))
1887  continue;
1888 
1889  Instruction *DefInst = Def->getMemoryInst();
1890  auto MaybeDefLoc = getLocForWrite(DefInst);
1891  if (!MaybeDefLoc || !isRemovable(DefInst))
1892  continue;
1893 
1894  MemoryDef *UpperDef;
1895  // To conserve compile-time, we avoid walking to the next clobbering def.
1896  // Instead, we just try to get the optimized access, if it exists. DSE
1897  // will try to optimize defs during the earlier traversal.
1898  if (Def->isOptimized())
1899  UpperDef = dyn_cast<MemoryDef>(Def->getOptimized());
1900  else
1901  UpperDef = dyn_cast<MemoryDef>(Def->getDefiningAccess());
1902  if (!UpperDef || MSSA.isLiveOnEntryDef(UpperDef))
1903  continue;
1904 
1905  Instruction *UpperInst = UpperDef->getMemoryInst();
1906  auto IsRedundantStore = [&]() {
1907  if (DefInst->isIdenticalTo(UpperInst))
1908  return true;
1909  if (auto *MemSetI = dyn_cast<MemSetInst>(UpperInst)) {
1910  if (auto *SI = dyn_cast<StoreInst>(DefInst)) {
1911  // MemSetInst must have a write location.
1912  MemoryLocation UpperLoc = *getLocForWrite(UpperInst);
1913  int64_t InstWriteOffset = 0;
1914  int64_t DepWriteOffset = 0;
1915  auto OR = isOverwrite(UpperInst, DefInst, UpperLoc, *MaybeDefLoc,
1916  InstWriteOffset, DepWriteOffset);
1917  Value *StoredByte = isBytewiseValue(SI->getValueOperand(), DL);
1918  return StoredByte && StoredByte == MemSetI->getOperand(1) &&
1919  OR == OW_Complete;
1920  }
1921  }
1922  return false;
1923  };
1924 
1925  if (!IsRedundantStore() || isReadClobber(*MaybeDefLoc, DefInst))
1926  continue;
1927  LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " << *DefInst
1928  << '\n');
1929  deleteDeadInstruction(DefInst);
1930  NumRedundantStores++;
1931  MadeChange = true;
1932  }
1933  return MadeChange;
1934  }
1935 };
1936 
1937 static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
1938  DominatorTree &DT, PostDominatorTree &PDT,
1939  AssumptionCache &AC,
1940  const TargetLibraryInfo &TLI,
1941  const LoopInfo &LI) {
1942  bool MadeChange = false;
1943 
1944  MSSA.ensureOptimizedUses();
1945  DSEState State(F, AA, MSSA, DT, PDT, AC, TLI, LI);
1946  // For each store:
1947  for (unsigned I = 0; I < State.MemDefs.size(); I++) {
1948  MemoryDef *KillingDef = State.MemDefs[I];
1949  if (State.SkipStores.count(KillingDef))
1950  continue;
1951  Instruction *KillingI = KillingDef->getMemoryInst();
1952 
1953  Optional<MemoryLocation> MaybeKillingLoc;
1954  if (State.isMemTerminatorInst(KillingI))
1955  MaybeKillingLoc = State.getLocForTerminator(KillingI).map(
1956  [](const std::pair<MemoryLocation, bool> &P) { return P.first; });
1957  else
1958  MaybeKillingLoc = State.getLocForWrite(KillingI);
1959 
1960  if (!MaybeKillingLoc) {
1961  LLVM_DEBUG(dbgs() << "Failed to find analyzable write location for "
1962  << *KillingI << "\n");
1963  continue;
1964  }
1965  MemoryLocation KillingLoc = *MaybeKillingLoc;
1966  assert(KillingLoc.Ptr && "KillingLoc should not be null");
1967  const Value *KillingUndObj = getUnderlyingObject(KillingLoc.Ptr);
1968  LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs killed by "
1969  << *KillingDef << " (" << *KillingI << ")\n");
1970 
1971  unsigned ScanLimit = MemorySSAScanLimit;
1972  unsigned WalkerStepLimit = MemorySSAUpwardsStepLimit;
1973  unsigned PartialLimit = MemorySSAPartialStoreLimit;
1974  // Worklist of MemoryAccesses that may be killed by KillingDef.
1975  SetVector<MemoryAccess *> ToCheck;
1976  ToCheck.insert(KillingDef->getDefiningAccess());
1977 
1978  bool Shortend = false;
1979  bool IsMemTerm = State.isMemTerminatorInst(KillingI);
1980  // Check if MemoryAccesses in the worklist are killed by KillingDef.
1981  for (unsigned I = 0; I < ToCheck.size(); I++) {
1982  MemoryAccess *Current = ToCheck[I];
1983  if (State.SkipStores.count(Current))
1984  continue;
1985 
1986  Optional<MemoryAccess *> MaybeDeadAccess = State.getDomMemoryDef(
1987  KillingDef, Current, KillingLoc, KillingUndObj, ScanLimit,
1988  WalkerStepLimit, IsMemTerm, PartialLimit);
1989 
1990  if (!MaybeDeadAccess) {
1991  LLVM_DEBUG(dbgs() << " finished walk\n");
1992  continue;
1993  }
1994 
1995  MemoryAccess *DeadAccess = *MaybeDeadAccess;
1996  LLVM_DEBUG(dbgs() << " Checking if we can kill " << *DeadAccess);
1997  if (isa<MemoryPhi>(DeadAccess)) {
1998  LLVM_DEBUG(dbgs() << "\n ... adding incoming values to worklist\n");
1999  for (Value *V : cast<MemoryPhi>(DeadAccess)->incoming_values()) {
2000  MemoryAccess *IncomingAccess = cast<MemoryAccess>(V);
2001  BasicBlock *IncomingBlock = IncomingAccess->getBlock();
2002  BasicBlock *PhiBlock = DeadAccess->getBlock();
2003 
2004  // We only consider incoming MemoryAccesses that come before the
2005  // MemoryPhi. Otherwise we could discover candidates that do not
2006  // strictly dominate our starting def.
2007  if (State.PostOrderNumbers[IncomingBlock] >
2008  State.PostOrderNumbers[PhiBlock])
2009  ToCheck.insert(IncomingAccess);
2010  }
2011  continue;
2012  }
2013  auto *DeadDefAccess = cast<MemoryDef>(DeadAccess);
2014  Instruction *DeadI = DeadDefAccess->getMemoryInst();
2015  LLVM_DEBUG(dbgs() << " (" << *DeadI << ")\n");
2016  ToCheck.insert(DeadDefAccess->getDefiningAccess());
2017  NumGetDomMemoryDefPassed++;
2018 
2019  if (!DebugCounter::shouldExecute(MemorySSACounter))
2020  continue;
2021 
2022  MemoryLocation DeadLoc = *State.getLocForWrite(DeadI);
2023 
2024  if (IsMemTerm) {
2025  const Value *DeadUndObj = getUnderlyingObject(DeadLoc.Ptr);
2026  if (KillingUndObj != DeadUndObj)
2027  continue;
2028  LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *DeadI
2029  << "\n KILLER: " << *KillingI << '\n');
2030  State.deleteDeadInstruction(DeadI);
2031  ++NumFastStores;
2032  MadeChange = true;
2033  } else {
2034  // Check if DeadI overwrites KillingI.
2035  int64_t KillingOffset = 0;
2036  int64_t DeadOffset = 0;
2037  OverwriteResult OR = State.isOverwrite(
2038  KillingI, DeadI, KillingLoc, DeadLoc, KillingOffset, DeadOffset);
2039  if (OR == OW_MaybePartial) {
2040  auto Iter = State.IOLs.insert(
2041  std::make_pair<BasicBlock *, InstOverlapIntervalsTy>(
2042  DeadI->getParent(), InstOverlapIntervalsTy()));
2043  auto &IOL = Iter.first->second;
2044  OR = isPartialOverwrite(KillingLoc, DeadLoc, KillingOffset,
2045  DeadOffset, DeadI, IOL);
2046  }
2047 
2048  if (EnablePartialStoreMerging && OR == OW_PartialEarlierWithFullLater) {
2049  auto *DeadSI = dyn_cast<StoreInst>(DeadI);
2050  auto *KillingSI = dyn_cast<StoreInst>(KillingI);
2051  // We are re-using tryToMergePartialOverlappingStores, which requires
2052  // DeadSI to dominate DeadSI.
2053  // TODO: implement tryToMergeParialOverlappingStores using MemorySSA.
2054  if (DeadSI && KillingSI && DT.dominates(DeadSI, KillingSI)) {
2056  KillingSI, DeadSI, KillingOffset, DeadOffset, State.DL,
2057  State.BatchAA, &DT)) {
2058 
2059  // Update stored value of earlier store to merged constant.
2060  DeadSI->setOperand(0, Merged);
2061  ++NumModifiedStores;
2062  MadeChange = true;
2063 
2064  Shortend = true;
2065  // Remove killing store and remove any outstanding overlap
2066  // intervals for the updated store.
2067  State.deleteDeadInstruction(KillingSI);
2068  auto I = State.IOLs.find(DeadSI->getParent());
2069  if (I != State.IOLs.end())
2070  I->second.erase(DeadSI);
2071  break;
2072  }
2073  }
2074  }
2075 
2076  if (OR == OW_Complete) {
2077  LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *DeadI
2078  << "\n KILLER: " << *KillingI << '\n');
2079  State.deleteDeadInstruction(DeadI);
2080  ++NumFastStores;
2081  MadeChange = true;
2082  }
2083  }
2084  }
2085 
2086  // Check if the store is a no-op.
2087  if (!Shortend && State.storeIsNoop(KillingDef, KillingUndObj)) {
2088  LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " << *KillingI
2089  << '\n');
2090  State.deleteDeadInstruction(KillingI);
2091  NumRedundantStores++;
2092  MadeChange = true;
2093  continue;
2094  }
2095 
2096  // Can we form a calloc from a memset/malloc pair?
2097  if (!Shortend && State.tryFoldIntoCalloc(KillingDef, KillingUndObj)) {
2098  LLVM_DEBUG(dbgs() << "DSE: Remove memset after forming calloc:\n"
2099  << " DEAD: " << *KillingI << '\n');
2100  State.deleteDeadInstruction(KillingI);
2101  MadeChange = true;
2102  continue;
2103  }
2104  }
2105 
2107  for (auto &KV : State.IOLs)
2108  MadeChange |= State.removePartiallyOverlappedStores(KV.second);
2109 
2110  MadeChange |= State.eliminateRedundantStoresOfExistingValues();
2111  MadeChange |= State.eliminateDeadWritesAtEndOfFunction();
2112  return MadeChange;
2113 }
2114 } // end anonymous namespace
2115 
2116 //===----------------------------------------------------------------------===//
2117 // DSE Pass
2118 //===----------------------------------------------------------------------===//
2123  MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
2126  LoopInfo &LI = AM.getResult<LoopAnalysis>(F);
2127 
2128  bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, AC, TLI, LI);
2129 
2130 #ifdef LLVM_ENABLE_STATS
2131  if (AreStatisticsEnabled())
2132  for (auto &I : instructions(F))
2133  NumRemainingStores += isa<StoreInst>(&I);
2134 #endif
2135 
2136  if (!Changed)
2137  return PreservedAnalyses::all();
2138 
2139  PreservedAnalyses PA;
2140  PA.preserveSet<CFGAnalyses>();
2142  PA.preserve<LoopAnalysis>();
2143  return PA;
2144 }
2145 
2146 namespace {
2147 
2148 /// A legacy pass for the legacy pass manager that wraps \c DSEPass.
2149 class DSELegacyPass : public FunctionPass {
2150 public:
2151  static char ID; // Pass identification, replacement for typeid
2152 
2153  DSELegacyPass() : FunctionPass(ID) {
2155  }
2156 
2157  bool runOnFunction(Function &F) override {
2158  if (skipFunction(F))
2159  return false;
2160 
2161  AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
2162  DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2163  const TargetLibraryInfo &TLI =
2164  getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
2165  MemorySSA &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
2166  PostDominatorTree &PDT =
2167  getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
2168  AssumptionCache &AC =
2169  getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
2170  LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2171 
2172  bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, AC, TLI, LI);
2173 
2174 #ifdef LLVM_ENABLE_STATS
2175  if (AreStatisticsEnabled())
2176  for (auto &I : instructions(F))
2177  NumRemainingStores += isa<StoreInst>(&I);
2178 #endif
2179 
2180  return Changed;
2181  }
2182 
2183  void getAnalysisUsage(AnalysisUsage &AU) const override {
2184  AU.setPreservesCFG();
2197  }
2198 };
2199 
2200 } // end anonymous namespace
2201 
2202 char DSELegacyPass::ID = 0;
2203 
2204 INITIALIZE_PASS_BEGIN(DSELegacyPass, "dse", "Dead Store Elimination", false,
2205  false)
2216  false)
2217 
2219  return new DSELegacyPass();
2220 }
dse
dse
Definition: DeadStoreElimination.cpp:2215
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
llvm::BatchAAResults
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
Definition: AliasAnalysis.h:951
llvm::Argument
This class represents an incoming formal argument to a Function.
Definition: Argument.h:28
AssumptionCache.h
llvm::AAManager
A manager for alias analyses.
Definition: AliasAnalysis.h:1303
getPointerSize
static uint64_t getPointerSize(const Value *V, const DataLayout &DL, const TargetLibraryInfo &TLI, const Function *F)
Definition: DeadStoreElimination.cpp:209
llvm::isAligned
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:146
llvm::EarliestEscapeInfo::removeInstruction
void removeInstruction(Instruction *I)
Definition: BasicAliasAnalysis.cpp:228
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:104
llvm::AliasResult::PartialAlias
@ PartialAlias
The two locations alias, but only due to a partial overlap.
Definition: AliasAnalysis.h:104
llvm::MemoryLocation::get
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
Definition: MemoryLocation.cpp:35
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
llvm::DominatorTreeBase::findNearestCommonDominator
NodeT * findNearestCommonDominator(NodeT *A, NodeT *B) const
Find nearest common dominator basic block for basic block A and B.
Definition: GenericDomTree.h:468
llvm::tgtok::Def
@ Def
Definition: TGLexer.h:50
llvm::CmpInst::ICMP_EQ
@ ICMP_EQ
equal
Definition: InstrTypes.h:740
llvm::drop_begin
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:280
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::ISD::OR
@ OR
Definition: ISDOpcodes.h:667
llvm::Type::getInt8PtrTy
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:291
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:719
llvm::User::operands
op_range operands()
Definition: User.h:242
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:87
llvm::PHITransAddr
PHITransAddr - An address value which tracks and handles phi translation.
Definition: PHITransAddr.h:35
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:104
IntrinsicInst.h
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:780
llvm::MemoryLocation::Ptr
const Value * Ptr
The address of the start of the location.
Definition: MemoryLocation.h:218
Scalar.h
MallocFamily::Malloc
@ Malloc
InstIterator.h
MemorySSAUpdater.h
llvm::Function
Definition: Function.h:60
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:546
StringRef.h
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
Pass.h
llvm::IntrinsicInst::getIntrinsicID
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:53
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::size
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:77
llvm::emitCalloc
Value * emitCalloc(Value *Num, Value *Size, IRBuilderBase &B, const TargetLibraryInfo &TLI)
Emit a call to the calloc function.
Definition: BuildLibCalls.cpp:1871
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1185
Statistic.h
CaptureTracking.h
llvm::EarliestEscapeInfo
Context-sensitive CaptureInfo provider, which computes and caches the earliest common dominator closu...
Definition: AliasAnalysis.h:402
ErrorHandling.h
llvm::Type::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:729
llvm::Function::getEntryBlock
const BasicBlock & getEntryBlock() const
Definition: Function.h:710
llvm::IRBuilder<>
MapVector.h
DeadStoreElimination.h
ValueTracking.h
Local.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
GlobalsModRef.h
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:139
llvm::PatternMatch::m_Br
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
Definition: PatternMatch.h:1725
llvm::APInt::getBitsSet
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:241
APInt.h
MemorySSASameBBStepCost
static cl::opt< unsigned > MemorySSASameBBStepCost("dse-memoryssa-samebb-cost", cl::init(1), cl::Hidden, cl::desc("The cost of a step in the same basic block as the killing MemoryDef" "(default = 1)"))
llvm::MemoryLocation::Size
LocationSize Size
The maximum size of the location, in address-units, or UnknownSize if the size is not known.
Definition: MemoryLocation.h:227
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
DenseMap.h
llvm::APInt::getBitWidth
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1411
Module.h
MemoryBuiltins.h
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:380
llvm::ObjectSizeOpts::NullIsUnknownSize
bool NullIsUnknownSize
If this is true, null pointers in address space 0 will be treated as though they can't be evaluated.
Definition: MemoryBuiltins.h:156
llvm::isBytewiseValue
Value * isBytewiseValue(Value *V, const DataLayout &DL)
If the specified value can be set by repeating the same byte in memory, return the i8 value that it i...
Definition: ValueTracking.cpp:3892
llvm::LoopInfoWrapperPass
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:1287
llvm::createDeadStoreEliminationPass
FunctionPass * createDeadStoreEliminationPass()
Definition: DeadStoreElimination.cpp:2218
llvm::Optional
Definition: APInt.h:33
llvm::IntervalMap::empty
bool empty() const
empty - Return true when no intervals are mapped.
Definition: IntervalMap.h:1062
llvm::PHITransAddr::IsPotentiallyPHITranslatable
bool IsPotentiallyPHITranslatable() const
IsPotentiallyPHITranslatable - If this needs PHI translation, return true if we have some hope of doi...
Definition: PHITransAddr.cpp:110
llvm::AliasResult
The possible results of an alias query.
Definition: AliasAnalysis.h:81
llvm::MapVector
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:37
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
llvm::Instruction::mayThrow
bool mayThrow() const
Return true if this instruction may throw an exception.
Definition: Instruction.cpp:685
llvm::max
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:337
llvm::SmallVectorImpl::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:654
tryToShortenBegin
static bool tryToShortenBegin(Instruction *DeadI, OverlapIntervalsTy &IntervalMap, int64_t &DeadStart, uint64_t &DeadSize)
Definition: DeadStoreElimination.cpp:601
llvm::LocationSize::isPrecise
bool isPrecise() const
Definition: MemoryLocation.h:166
llvm::Type::getInt8Ty
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:237
llvm::BatchAAResults::getModRefInfo
ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc)
Definition: AliasAnalysis.h:966
MustExecute.h
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:240
llvm::MemSetBase::getValue
Value * getValue() const
Definition: IntrinsicInst.h:849
llvm::AtomicOrdering::Monotonic
@ Monotonic
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
AliasAnalysis.h
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::DominatorTree::dominates
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
Instruction.h
CommandLine.h
CodeMetrics.h
MemorySSAScanLimit
static cl::opt< unsigned > MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(150), cl::Hidden, cl::desc("The number of memory instructions to scan for " "dead store elimination (default = 150)"))
llvm::StoreInst::getValueOperand
Value * getValueOperand()
Definition: Instructions.h:386
llvm::AreStatisticsEnabled
bool AreStatisticsEnabled()
Check if statistics are enabled.
Definition: Statistic.cpp:138
llvm::isNoAliasCall
bool isNoAliasCall(const Value *V)
Return true if this pointer is returned by a noalias function.
Definition: AliasAnalysis.cpp:962
llvm::MemorySSAWrapperPass
Legacy analysis pass which computes MemorySSA.
Definition: MemorySSA.h:998
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
Constants.h
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
llvm::AAResults
Definition: AliasAnalysis.h:511
PostDominators.h
llvm::Constant::isNullValue
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:76
llvm::MemorySSA::isLiveOnEntryDef
bool isLiveOnEntryDef(const MemoryAccess *MA) const
Return true if MA represents the live on entry value.
Definition: MemorySSA.h:751
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
OverlapIntervalsTy
std::map< int64_t, int64_t > OverlapIntervalsTy
Definition: DeadStoreElimination.cpp:173
llvm::LibFunc
LibFunc
Definition: TargetLibraryInfo.h:35
InstrTypes.h
llvm::PostDominatorTreeWrapperPass
Definition: PostDominators.h:73
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::LocationSize::getValue
uint64_t getValue() const
Definition: MemoryLocation.h:159
llvm::AliasResult::hasOffset
constexpr bool hasOffset() const
Definition: AliasAnalysis.h:117
TargetLibraryInfo.h
AssumeBundleBuilder.h
llvm::Value::uses
iterator_range< use_iterator > uses()
Definition: Value.h:376
false
Definition: StackSlotColoring.cpp:141
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::TargetLibraryInfo::getLibFunc
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
Definition: TargetLibraryInfo.h:294
llvm::PatternMatch::m_ConstantInt
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:147
llvm::Instruction
Definition: Instruction.h:42
llvm::DebugCounter::shouldExecute
static bool shouldExecute(unsigned CounterName)
Definition: DebugCounter.h:74
Elimination
Dead Store Elimination
Definition: DeadStoreElimination.cpp:2215
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:302
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::predecessors
auto predecessors(MachineBasicBlock *BB)
Definition: MachineSSAContext.h:30
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:928
llvm::Use::getUser
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:72
llvm::getUnderlyingObject
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value,...
Definition: ValueTracking.cpp:4455
llvm::CodeMetrics::collectEphemeralValues
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
Definition: CodeMetrics.cpp:70
SmallPtrSet.h
llvm::DominatorTreeBase::roots
iterator_range< root_iterator > roots()
Definition: GenericDomTree.h:304
PatternMatch.h
llvm::MemoryUseOrDef::getMemoryInst
Instruction * getMemoryInst() const
Get the instruction that this MemoryUse represents.
Definition: MemorySSA.h:259
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::Instruction::mayWriteToMemory
bool mayWriteToMemory() const
Return true if this instruction may modify memory.
Definition: Instruction.cpp:596
llvm::isModSet
LLVM_NODISCARD bool isModSet(const ModRefInfo MRI)
Definition: AliasAnalysis.h:196
llvm::SPII::Store
@ Store
Definition: SparcInstrInfo.h:33
llvm::getObjectSize
bool getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout &DL, const TargetLibraryInfo *TLI, ObjectSizeOpts Opts={})
Compute the size of the object pointed by Ptr.
Definition: MemoryBuiltins.cpp:559
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(DSELegacyPass, "dse", "Dead Store Elimination", false, false) INITIALIZE_PASS_END(DSELegacyPass
llvm::None
const NoneType None
Definition: None.h:24
llvm::MemoryAccess::getBlock
BasicBlock * getBlock() const
Definition: MemorySSA.h:164
memoryIsNotModifiedBetween
static bool memoryIsNotModifiedBetween(Instruction *FirstI, Instruction *SecondI, BatchAAResults &AA, const DataLayout &DL, DominatorTree *DT)
Returns true if the memory which is accessed by the second instruction is not modified between the fi...
Definition: DeadStoreElimination.cpp:388
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
isShortenableAtTheBeginning
static bool isShortenableAtTheBeginning(Instruction *I)
Returns true if the beginning of this instruction can be safely shortened in length.
Definition: DeadStoreElimination.cpp:203
llvm::MemSetInst
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
Definition: IntrinsicInst.h:989
llvm::AliasResult::getOffset
constexpr int32_t getOffset() const
Definition: AliasAnalysis.h:118
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
LoopInfo.h
llvm::MemorySSAWalker::getClobberingMemoryAccess
MemoryAccess * getClobberingMemoryAccess(const Instruction *I)
Given a memory Mod/Ref/ModRef'ing instruction, calling this will give you the nearest dominating Memo...
Definition: MemorySSA.h:1058
BasicBlock.h
llvm::cl::opt< bool >
llvm::RISCVFenceField::O
@ O
Definition: RISCVBaseInfo.h:239
llvm::instructions
inst_range instructions(Function *F)
Definition: InstIterator.h:133
llvm::PatternMatch::m_Zero
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:531
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:297
llvm::getInitialValueOfAllocation
Constant * getInitialValueOfAllocation(const Value *V, const TargetLibraryInfo *TLI, Type *Ty)
If this is a call to an allocation function that initializes memory to a fixed value,...
Definition: MemoryBuiltins.cpp:422
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::Instruction::eraseFromParent
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:77
llvm::MemoryLocation::getOrNone
static Optional< MemoryLocation > getOrNone(const Instruction *Inst)
Definition: MemoryLocation.cpp:76
llvm::BatchAAResults::alias
AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
Definition: AliasAnalysis.h:960
llvm::MapVector::find
iterator find(const KeyT &Key)
Definition: MapVector.h:148
llvm::TargetLibraryInfoWrapperPass
Definition: TargetLibraryInfo.h:468
uint64_t
D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
llvm::MemorySSAUpdater
Definition: MemorySSAUpdater.h:54
Addr
uint64_t Addr
Definition: ELFObjHandler.cpp:78
llvm::AssumptionAnalysis
A function analysis which provides an AssumptionCache.
Definition: AssumptionCache.h:173
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::PreservedAnalyses::preserve
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:173
llvm::Instruction::isIdenticalTo
bool isIdenticalTo(const Instruction *I) const
Return true if the specified instruction is exactly identical to the current one.
Definition: Instruction.cpp:502
MemoryLocation.h
llvm::DenseMap
Definition: DenseMap.h:716
llvm::MemorySSA
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:714
llvm::LoopInfoBase::getLoopFor
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:986
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::MemorySSA::getMemoryAccess
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
Definition: MemorySSA.h:731
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
llvm::SmallPtrSetImpl::begin
iterator begin() const
Definition: SmallPtrSet.h:403
llvm::MemoryDef
Represents a read-write access to memory, whether it is a must-alias, or a may-alias.
Definition: MemorySSA.h:386
llvm::Instruction::setDebugLoc
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:364
EnablePartialStoreMerging
static cl::opt< bool > EnablePartialStoreMerging("enable-dse-partial-store-merging", cl::init(true), cl::Hidden, cl::desc("Enable partial store merging in DSE"))
llvm::GetElementPtrInst::CreateInBounds
static GetElementPtrInst * CreateInBounds(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Create an "inbounds" getelementptr.
Definition: Instructions.h:969
llvm::DenseMapBase::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:152
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::RegState::Dead
@ Dead
Unused definition.
Definition: MachineInstrBuilder.h:50
memcpy
<%struct.s * > cast struct s *S to sbyte *< sbyte * > sbyte uint cast struct s *agg result to sbyte *< sbyte * > sbyte uint cast struct s *memtmp to sbyte *< sbyte * > sbyte uint ret void llc ends up issuing two memcpy or custom lower memcpy(of small size) to be ldmia/stmia. I think option 2 is better but the current register allocator cannot allocate a chunk of registers at a time. A feasible temporary solution is to use specific physical registers at the lowering time for small(<
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::DominatorTree::isReachableFromEntry
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:335
llvm::TargetLibraryInfo::has
bool has(LibFunc F) const
Tests whether a library function is available.
Definition: TargetLibraryInfo.h:328
MemorySSAOtherBBStepCost
static cl::opt< unsigned > MemorySSAOtherBBStepCost("dse-memoryssa-otherbb-cost", cl::init(5), cl::Hidden, cl::desc("The cost of a step in a different basic " "block than the killing MemoryDef" "(default = 5)"))
isMaskedStoreOverwrite
static OverwriteResult isMaskedStoreOverwrite(const Instruction *KillingI, const Instruction *DeadI, BatchAAResults &AA)
Check if two instruction are masked stores that completely overwrite one another.
Definition: DeadStoreElimination.cpp:238
llvm::salvageKnowledge
void salvageKnowledge(Instruction *I, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr)
Calls BuildAssumeFromInst and if the resulting llvm.assume is valid insert if before I.
Definition: AssumeBundleBuilder.cpp:293
llvm::PHITransAddr::PHITranslateValue
bool PHITranslateValue(BasicBlock *CurBB, BasicBlock *PredBB, const DominatorTree *DT, bool MustDominate)
PHITranslateValue - PHI translate the current address up the CFG from CurBB to Pred,...
Definition: PHITransAddr.cpp:310
EnablePartialOverwriteTracking
static cl::opt< bool > EnablePartialOverwriteTracking("enable-dse-partial-overwrite-tracking", cl::init(true), cl::Hidden, cl::desc("Enable partial-overwrite tracking in DSE"))
llvm::MemorySSAAnalysis
An analysis that produces MemorySSA for a function.
Definition: MemorySSA.h:948
llvm::MemorySSA::getWalker
MemorySSAWalker * getWalker()
Definition: MemorySSA.cpp:1604
llvm::AMDGPU::IsaInfo::TargetIDSetting::Off
@ Off
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:383
MemorySSAPathCheckLimit
static cl::opt< unsigned > MemorySSAPathCheckLimit("dse-memoryssa-path-check-limit", cl::init(50), cl::Hidden, cl::desc("The maximum number of blocks to check when trying to prove that " "all paths to an exit go through a killing block (default = 50)"))
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
llvm::AssumptionCacheTracker
An immutable pass that tracks lazily created AssumptionCache objects.
Definition: AssumptionCache.h:202
llvm::isInstructionTriviallyDead
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition: Local.cpp:395
llvm::LoopInfo
Definition: LoopInfo.h:1102
tryToMergePartialOverlappingStores
static Constant * tryToMergePartialOverlappingStores(StoreInst *KillingI, StoreInst *DeadI, int64_t KillingOffset, int64_t DeadOffset, const DataLayout &DL, BatchAAResults &AA, DominatorTree *DT)
Definition: DeadStoreElimination.cpp:631
llvm::PointerMayBeCaptured
bool PointerMayBeCaptured(const Value *V, bool ReturnCaptures, bool StoreCaptures, unsigned MaxUsesToExplore=0)
PointerMayBeCaptured - Return true if this pointer value may be captured by the enclosing function (w...
Definition: CaptureTracking.cpp:229
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::IntervalMap::end
const_iterator end() const
Definition: IntervalMap.h:1119
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1624
DataLayout.h
llvm::MemorySSA::getSkipSelfWalker
MemorySSAWalker * getSkipSelfWalker()
Definition: MemorySSA.cpp:1619
llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:263
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:42
llvm::PostDominatorTree
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
Definition: PostDominators.h:28
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::isNotVisibleOnUnwind
bool isNotVisibleOnUnwind(const Value *Object, bool &RequiresNoCaptureBeforeUnwind)
Return true if Object memory is not visible after an unwind, in the sense that program semantics cann...
Definition: AliasAnalysis.cpp:1012
llvm::CFGAnalyses
Represents analyses that only rely on functions' control flow.
Definition: PassManager.h:113
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
getParent
static const Function * getParent(const Value *V)
Definition: BasicAliasAnalysis.cpp:845
uint32_t
llvm::AliasResult::NoAlias
@ NoAlias
The two locations do not alias at all.
Definition: AliasAnalysis.h:99
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::AliasResult::MustAlias
@ MustAlias
The two locations precisely alias each other.
Definition: AliasAnalysis.h:106
llvm::ObjectSizeOpts
Various options to control the behavior of getObjectSize.
Definition: MemoryBuiltins.h:136
llvm::DominatorTreeBase::properlyDominates
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
Definition: GenericDomTree.h:392
llvm::Instruction::mayReadFromMemory
bool mayReadFromMemory() const
Return true if this instruction may read memory.
Definition: Instruction.cpp:576
llvm::MemoryAccess
Definition: MemorySSA.h:142
llvm::ifs::IFSSymbolType::Func
@ Func
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::insert
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:209
llvm::Value::stripPointerCasts
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:682
llvm::MapVector::end
iterator end()
Definition: MapVector.h:72
Argument.h
llvm::APInt::zext
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:973
llvm::MemorySSA::ensureOptimizedUses
void ensureOptimizedUses()
By default, uses are not optimized during MemorySSA construction.
Definition: MemorySSA.cpp:2186
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
llvm::Instruction::isAtomic
bool isAtomic() const
Return true if this instruction has an AtomicOrdering of unordered or higher.
Definition: Instruction.cpp:616
llvm::PHITransAddr::getAddr
Value * getAddr() const
Definition: PHITransAddr.h:59
Constant.h
llvm::BatchAAResults::isMustAlias
bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
Definition: AliasAnalysis.h:985
tryToShorten
static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart, uint64_t &DeadSize, int64_t KillingStart, uint64_t KillingSize, bool IsOverwriteEnd)
Definition: DeadStoreElimination.cpp:475
llvm::MemoryDependenceWrapperPass
A wrapper analysis pass for the legacy pass manager that exposes a MemoryDepnedenceResults instance.
Definition: MemoryDependenceAnalysis.h:526
llvm::MemIntrinsicBase::getLength
Value * getLength() const
Definition: IntrinsicInst.h:731
llvm::MemoryUseOrDef::getDefiningAccess
MemoryAccess * getDefiningAccess() const
Get the access that produces the memory state used by this Use.
Definition: MemorySSA.h:262
llvm::MemoryDef::setOptimized
void setOptimized(MemoryAccess *MA)
Definition: MemorySSA.h:406
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
llvm::initializeDSELegacyPassPass
void initializeDSELegacyPassPass(PassRegistry &)
llvm::DSEPass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
Definition: DeadStoreElimination.cpp:2119
llvm::Align::value
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
MemorySSADefsPerBlockLimit
static cl::opt< unsigned > MemorySSADefsPerBlockLimit("dse-memoryssa-defs-per-block-limit", cl::init(5000), cl::Hidden, cl::desc("The number of MemoryDefs we consider as candidates to eliminated " "other stores per basic block (default = 5000)"))
Casting.h
llvm::post_order
iterator_range< po_iterator< T > > post_order(const T &G)
Definition: PostOrderIterator.h:189
Function.h
OptimizeMemorySSA
static cl::opt< bool > OptimizeMemorySSA("dse-optimize-memoryssa", cl::init(true), cl::Hidden, cl::desc("Allow DSE to optimize memory accesses."))
DebugCounter.h
PassManager.h
llvm::salvageDebugInfo
void salvageDebugInfo(Instruction &I)
Assuming the instruction I is going to be deleted, attempt to salvage debug users of I by writing the...
Definition: Local.cpp:1739
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:222
llvm::GetPointerBaseWithConstantOffset
Value * GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, const DataLayout &DL, bool AllowNonInbounds=true)
Analyze the specified pointer to see if it can be expressed as a base pointer plus a constant offset.
Definition: ValueTracking.h:278
llvm::MemoryLocation::getWithNewPtr
MemoryLocation getWithNewPtr(const Value *NewPtr) const
Definition: MemoryLocation.h:294
MemorySSAPartialStoreLimit
static cl::opt< unsigned > MemorySSAPartialStoreLimit("dse-memoryssa-partial-store-limit", cl::init(5), cl::Hidden, cl::desc("The maximum number candidates that only partially overwrite the " "killing MemoryDef to consider" " (default = 5)"))
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:46
llvm::isStrongerThan
bool isStrongerThan(AtomicOrdering AO, AtomicOrdering Other)
Returns true if ao is stronger than other as defined by the AtomicOrdering lattice,...
Definition: AtomicOrdering.h:90
AA
DEBUG_COUNTER
DEBUG_COUNTER(MemorySSACounter, "dse-memoryssa", "Controls which MemoryDefs are eliminated.")
llvm::DominatorTreeAnalysis
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:267
MemorySSA.h
Instructions.h
PostOrderIterator.h
llvm::PreservedAnalyses::preserveSet
void preserveSet()
Mark an analysis set as preserved.
Definition: PassManager.h:188
llvm::MemoryLocation::UnknownSize
@ UnknownSize
Definition: MemoryLocation.h:215
SmallVector.h
llvm::PatternMatch::m_Specific
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:766
llvm::isRefSet
LLVM_NODISCARD bool isRefSet(const ModRefInfo MRI)
Definition: AliasAnalysis.h:199
tryToShortenEnd
static bool tryToShortenEnd(Instruction *DeadI, OverlapIntervalsTy &IntervalMap, int64_t &DeadStart, uint64_t &DeadSize)
Definition: DeadStoreElimination.cpp:574
llvm::PatternMatch::m_ICmp
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
Definition: PatternMatch.h:1388
Dominators.h
llvm::MemoryLocation::getAfter
static MemoryLocation getAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location after Ptr, while remaining within the underlying objec...
Definition: MemoryLocation.h:270
llvm::SmallPtrSetImplBase::empty
LLVM_NODISCARD bool empty() const
Definition: SmallPtrSet.h:92
llvm::IntervalMap::begin
const_iterator begin() const
Definition: IntervalMap.h:1107
llvm::AAResultsWrapperPass
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Definition: AliasAnalysis.h:1351
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:91
llvm::GlobalsAAWrapperPass
Legacy wrapper pass to provide the GlobalsAAResult object.
Definition: GlobalsModRef.h:148
llvm::isStrongerThanMonotonic
bool isStrongerThanMonotonic(AtomicOrdering AO)
Definition: AtomicOrdering.h:124
llvm::IntervalMap
Definition: IntervalMap.h:936
llvm::Optional::map
auto map(const Function &F) const &-> Optional< decltype(F(value()))>
Apply a function to the value if present; otherwise return None.
Definition: Optional.h:327
isPartialOverwrite
static OverwriteResult isPartialOverwrite(const MemoryLocation &KillingLoc, const MemoryLocation &DeadLoc, int64_t KillingOff, int64_t DeadOff, Instruction *DeadI, InstOverlapIntervalsTy &IOL)
Return 'OW_Complete' if a store to the 'KillingLoc' location completely overwrites a store to the 'De...
Definition: DeadStoreElimination.cpp:270
llvm::SmallSetVector
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:307
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:42
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1461
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:171
llvm::PostDominatorTreeAnalysis
Analysis pass which computes a PostDominatorTree.
Definition: PostDominators.h:47
deleteDeadInstruction
static void deleteDeadInstruction(Instruction *I)
Definition: LoopIdiomRecognize.cpp:347
llvm::CastInst::CreatePointerCast
static CastInst * CreatePointerCast(Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd)
Create a BitCast AddrSpaceCast, or a PtrToInt cast instruction.
Definition: Instructions.cpp:3274
llvm::MemoryLocation::getForDest
static MemoryLocation getForDest(const MemIntrinsic *MI)
Return a location representing the destination of a memory set or transfer.
Definition: MemoryLocation.cpp:106
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
MemorySSAUpwardsStepLimit
static cl::opt< unsigned > MemorySSAUpwardsStepLimit("dse-memoryssa-walklimit", cl::init(90), cl::Hidden, cl::desc("The maximum number of steps while walking upwards to find " "MemoryDefs that may be killed (default = 90)"))
llvm::cl::desc
Definition: CommandLine.h:405
raw_ostream.h
llvm::NullPointerIsDefined
bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
Definition: Function.cpp:2047
llvm::PHITransAddr::NeedsPHITranslationFromBlock
bool NeedsPHITranslationFromBlock(BasicBlock *BB) const
NeedsPHITranslationFromBlock - Return true if moving from the specified BasicBlock to its predecessor...
Definition: PHITransAddr.h:63
llvm::SetVector
A vector that has set insertion semantics.
Definition: SetVector.h:40
llvm::SmallPtrSetImpl::contains
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:389
llvm::PostDominatorTree::dominates
bool dominates(const Instruction *I1, const Instruction *I2) const
Return true if I1 dominates I2.
Definition: PostDominators.cpp:54
Value.h
InitializePasses.h
isShortenableAtTheEnd
static bool isShortenableAtTheEnd(Instruction *I)
Returns true if the end of this instruction can be safely shortened in length.
Definition: DeadStoreElimination.cpp:178
BuildLibCalls.h
llvm::offsetToAlignment
uint64_t offsetToAlignment(uint64_t Value, Align Alignment)
Returns the offset to the next integer (mod 2**64) that is greater than or equal to Value and is a mu...
Definition: Alignment.h:198
llvm::mayContainIrreducibleControl
bool mayContainIrreducibleControl(const Function &F, const LoopInfo *LI)
Definition: MustExecute.cpp:479
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
Debug.h
llvm::TargetLibraryAnalysis
Analysis pass providing the TargetLibraryInfo.
Definition: TargetLibraryInfo.h:443
llvm::MemoryLocation
Representation for a specific memory location.
Definition: MemoryLocation.h:210
llvm::LoopAnalysis
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:1262
llvm::isFreeCall
const CallInst * isFreeCall(const Value *I, const TargetLibraryInfo *TLI)
isFreeCall - Returns non-null if the value is a call to the builtin free()
Definition: MemoryBuiltins.cpp:532
SetVector.h
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38