LLVM  13.0.0git
DeadStoreElimination.cpp
Go to the documentation of this file.
1 //===- DeadStoreElimination.cpp - MemorySSA Backed Dead Store Elimination -===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // The code below implements dead store elimination using MemorySSA. It uses
10 // the following general approach: given a MemoryDef, walk upwards to find
11 // clobbering MemoryDefs that may be killed by the starting def. Then check
12 // that there are no uses that may read the location of the original MemoryDef
13 // in between both MemoryDefs. A bit more concretely:
14 //
15 // For all MemoryDefs StartDef:
16 // 1. Get the next dominating clobbering MemoryDef (EarlierAccess) by walking
17 // upwards.
18 // 2. Check that there are no reads between EarlierAccess and the StartDef by
19 // checking all uses starting at EarlierAccess and walking until we see
20 // StartDef.
21 // 3. For each found CurrentDef, check that:
22 // 1. There are no barrier instructions between CurrentDef and StartDef (like
23 // throws or stores with ordering constraints).
24 // 2. StartDef is executed whenever CurrentDef is executed.
25 // 3. StartDef completely overwrites CurrentDef.
26 // 4. Erase CurrentDef from the function and MemorySSA.
27 //
28 //===----------------------------------------------------------------------===//
29 
31 #include "llvm/ADT/APInt.h"
32 #include "llvm/ADT/DenseMap.h"
33 #include "llvm/ADT/MapVector.h"
35 #include "llvm/ADT/SetVector.h"
36 #include "llvm/ADT/SmallPtrSet.h"
37 #include "llvm/ADT/SmallVector.h"
38 #include "llvm/ADT/Statistic.h"
39 #include "llvm/ADT/StringRef.h"
50 #include "llvm/IR/Argument.h"
51 #include "llvm/IR/BasicBlock.h"
52 #include "llvm/IR/Constant.h"
53 #include "llvm/IR/Constants.h"
54 #include "llvm/IR/DataLayout.h"
55 #include "llvm/IR/Dominators.h"
56 #include "llvm/IR/Function.h"
57 #include "llvm/IR/InstIterator.h"
58 #include "llvm/IR/InstrTypes.h"
59 #include "llvm/IR/Instruction.h"
60 #include "llvm/IR/Instructions.h"
61 #include "llvm/IR/IntrinsicInst.h"
62 #include "llvm/IR/Intrinsics.h"
63 #include "llvm/IR/LLVMContext.h"
64 #include "llvm/IR/Module.h"
65 #include "llvm/IR/PassManager.h"
66 #include "llvm/IR/PatternMatch.h"
67 #include "llvm/IR/Value.h"
68 #include "llvm/InitializePasses.h"
69 #include "llvm/Pass.h"
70 #include "llvm/Support/Casting.h"
72 #include "llvm/Support/Debug.h"
77 #include "llvm/Transforms/Scalar.h"
80 #include <algorithm>
81 #include <cassert>
82 #include <cstddef>
83 #include <cstdint>
84 #include <iterator>
85 #include <map>
86 #include <utility>
87 
88 using namespace llvm;
89 using namespace PatternMatch;
90 
91 #define DEBUG_TYPE "dse"
92 
93 STATISTIC(NumRemainingStores, "Number of stores remaining after DSE");
94 STATISTIC(NumRedundantStores, "Number of redundant stores deleted");
95 STATISTIC(NumFastStores, "Number of stores deleted");
96 STATISTIC(NumFastOther, "Number of other instrs removed");
97 STATISTIC(NumCompletePartials, "Number of stores dead by later partials");
98 STATISTIC(NumModifiedStores, "Number of stores modified");
99 STATISTIC(NumCFGChecks, "Number of stores modified");
100 STATISTIC(NumCFGTries, "Number of stores modified");
101 STATISTIC(NumCFGSuccess, "Number of stores modified");
102 STATISTIC(NumGetDomMemoryDefPassed,
103  "Number of times a valid candidate is returned from getDomMemoryDef");
104 STATISTIC(NumDomMemDefChecks,
105  "Number iterations check for reads in getDomMemoryDef");
106 
107 DEBUG_COUNTER(MemorySSACounter, "dse-memoryssa",
108  "Controls which MemoryDefs are eliminated.");
109 
110 static cl::opt<bool>
111 EnablePartialOverwriteTracking("enable-dse-partial-overwrite-tracking",
112  cl::init(true), cl::Hidden,
113  cl::desc("Enable partial-overwrite tracking in DSE"));
114 
115 static cl::opt<bool>
116 EnablePartialStoreMerging("enable-dse-partial-store-merging",
117  cl::init(true), cl::Hidden,
118  cl::desc("Enable partial store merging in DSE"));
119 
120 static cl::opt<unsigned>
121  MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(150), cl::Hidden,
122  cl::desc("The number of memory instructions to scan for "
123  "dead store elimination (default = 100)"));
125  "dse-memoryssa-walklimit", cl::init(90), cl::Hidden,
126  cl::desc("The maximum number of steps while walking upwards to find "
127  "MemoryDefs that may be killed (default = 90)"));
128 
130  "dse-memoryssa-partial-store-limit", cl::init(5), cl::Hidden,
131  cl::desc("The maximum number candidates that only partially overwrite the "
132  "killing MemoryDef to consider"
133  " (default = 5)"));
134 
136  "dse-memoryssa-defs-per-block-limit", cl::init(5000), cl::Hidden,
137  cl::desc("The number of MemoryDefs we consider as candidates to eliminated "
138  "other stores per basic block (default = 5000)"));
139 
141  "dse-memoryssa-samebb-cost", cl::init(1), cl::Hidden,
142  cl::desc(
143  "The cost of a step in the same basic block as the killing MemoryDef"
144  "(default = 1)"));
145 
146 static cl::opt<unsigned>
147  MemorySSAOtherBBStepCost("dse-memoryssa-otherbb-cost", cl::init(5),
148  cl::Hidden,
149  cl::desc("The cost of a step in a different basic "
150  "block than the killing MemoryDef"
151  "(default = 5)"));
152 
154  "dse-memoryssa-path-check-limit", cl::init(50), cl::Hidden,
155  cl::desc("The maximum number of blocks to check when trying to prove that "
156  "all paths to an exit go through a killing block (default = 50)"));
157 
158 //===----------------------------------------------------------------------===//
159 // Helper functions
160 //===----------------------------------------------------------------------===//
161 using OverlapIntervalsTy = std::map<int64_t, int64_t>;
163 
164 /// Does this instruction write some memory? This only returns true for things
165 /// that we can analyze with other helpers below.
167  const TargetLibraryInfo &TLI) {
168  if (isa<StoreInst>(I))
169  return true;
170  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
171  switch (II->getIntrinsicID()) {
172  default:
173  return false;
174  case Intrinsic::memset:
175  case Intrinsic::memmove:
176  case Intrinsic::memcpy:
177  case Intrinsic::memcpy_inline:
178  case Intrinsic::memcpy_element_unordered_atomic:
179  case Intrinsic::memmove_element_unordered_atomic:
180  case Intrinsic::memset_element_unordered_atomic:
181  case Intrinsic::init_trampoline:
182  case Intrinsic::lifetime_end:
183  case Intrinsic::masked_store:
184  return true;
185  }
186  }
187  if (auto *CB = dyn_cast<CallBase>(I)) {
188  LibFunc LF;
189  if (TLI.getLibFunc(*CB, LF) && TLI.has(LF)) {
190  switch (LF) {
191  case LibFunc_strcpy:
192  case LibFunc_strncpy:
193  case LibFunc_strcat:
194  case LibFunc_strncat:
195  return true;
196  default:
197  return false;
198  }
199  }
200  }
201  return false;
202 }
203 
204 /// Return a Location stored to by the specified instruction. If isRemovable
205 /// returns true, this function and getLocForRead completely describe the memory
206 /// operations for this instruction.
208  const TargetLibraryInfo &TLI) {
209  if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
210  return MemoryLocation::get(SI);
211 
212  // memcpy/memmove/memset.
213  if (auto *MI = dyn_cast<AnyMemIntrinsic>(Inst))
215 
216  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
217  switch (II->getIntrinsicID()) {
218  default:
219  return MemoryLocation(); // Unhandled intrinsic.
220  case Intrinsic::init_trampoline:
221  return MemoryLocation::getAfter(II->getArgOperand(0));
222  case Intrinsic::masked_store:
223  return MemoryLocation::getForArgument(II, 1, TLI);
224  case Intrinsic::lifetime_end: {
225  uint64_t Len = cast<ConstantInt>(II->getArgOperand(0))->getZExtValue();
226  return MemoryLocation(II->getArgOperand(1), Len);
227  }
228  }
229  }
230  if (auto *CB = dyn_cast<CallBase>(Inst))
231  // All the supported TLI functions so far happen to have dest as their
232  // first argument.
233  return MemoryLocation::getAfter(CB->getArgOperand(0));
234  return MemoryLocation();
235 }
236 
237 /// If the value of this instruction and the memory it writes to is unused, may
238 /// we delete this instruction?
239 static bool isRemovable(Instruction *I) {
240  // Don't remove volatile/atomic stores.
241  if (StoreInst *SI = dyn_cast<StoreInst>(I))
242  return SI->isUnordered();
243 
244  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
245  switch (II->getIntrinsicID()) {
246  default: llvm_unreachable("doesn't pass 'hasAnalyzableMemoryWrite' predicate");
247  case Intrinsic::lifetime_end:
248  // Never remove dead lifetime_end's, e.g. because it is followed by a
249  // free.
250  return false;
251  case Intrinsic::init_trampoline:
252  // Always safe to remove init_trampoline.
253  return true;
254  case Intrinsic::memset:
255  case Intrinsic::memmove:
256  case Intrinsic::memcpy:
257  case Intrinsic::memcpy_inline:
258  // Don't remove volatile memory intrinsics.
259  return !cast<MemIntrinsic>(II)->isVolatile();
260  case Intrinsic::memcpy_element_unordered_atomic:
261  case Intrinsic::memmove_element_unordered_atomic:
262  case Intrinsic::memset_element_unordered_atomic:
263  case Intrinsic::masked_store:
264  return true;
265  }
266  }
267 
268  // note: only get here for calls with analyzable writes - i.e. libcalls
269  if (auto *CB = dyn_cast<CallBase>(I))
270  return CB->use_empty();
271 
272  return false;
273 }
274 
275 /// Returns true if the end of this instruction can be safely shortened in
276 /// length.
278  // Don't shorten stores for now
279  if (isa<StoreInst>(I))
280  return false;
281 
282  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
283  switch (II->getIntrinsicID()) {
284  default: return false;
285  case Intrinsic::memset:
286  case Intrinsic::memcpy:
287  case Intrinsic::memcpy_element_unordered_atomic:
288  case Intrinsic::memset_element_unordered_atomic:
289  // Do shorten memory intrinsics.
290  // FIXME: Add memmove if it's also safe to transform.
291  return true;
292  }
293  }
294 
295  // Don't shorten libcalls calls for now.
296 
297  return false;
298 }
299 
300 /// Returns true if the beginning of this instruction can be safely shortened
301 /// in length.
303  // FIXME: Handle only memset for now. Supporting memcpy/memmove should be
304  // easily done by offsetting the source address.
305  return isa<AnyMemSetInst>(I);
306 }
307 
308 static uint64_t getPointerSize(const Value *V, const DataLayout &DL,
309  const TargetLibraryInfo &TLI,
310  const Function *F) {
311  uint64_t Size;
312  ObjectSizeOpts Opts;
314 
315  if (getObjectSize(V, Size, DL, &TLI, Opts))
316  return Size;
318 }
319 
320 namespace {
321 
322 enum OverwriteResult {
323  OW_Begin,
324  OW_Complete,
325  OW_End,
326  OW_PartialEarlierWithFullLater,
327  OW_MaybePartial,
328  OW_Unknown
329 };
330 
331 } // end anonymous namespace
332 
333 /// Check if two instruction are masked stores that completely
334 /// overwrite one another. More specifically, \p Later has to
335 /// overwrite \p Earlier.
336 static OverwriteResult isMaskedStoreOverwrite(const Instruction *Later,
337  const Instruction *Earlier,
338  BatchAAResults &AA) {
339  const auto *IIL = dyn_cast<IntrinsicInst>(Later);
340  const auto *IIE = dyn_cast<IntrinsicInst>(Earlier);
341  if (IIL == nullptr || IIE == nullptr)
342  return OW_Unknown;
343  if (IIL->getIntrinsicID() != Intrinsic::masked_store ||
344  IIE->getIntrinsicID() != Intrinsic::masked_store)
345  return OW_Unknown;
346  // Pointers.
347  Value *LP = IIL->getArgOperand(1)->stripPointerCasts();
348  Value *EP = IIE->getArgOperand(1)->stripPointerCasts();
349  if (LP != EP && !AA.isMustAlias(LP, EP))
350  return OW_Unknown;
351  // Masks.
352  // TODO: check that Later's mask is a superset of the Earlier's mask.
353  if (IIL->getArgOperand(3) != IIE->getArgOperand(3))
354  return OW_Unknown;
355  return OW_Complete;
356 }
357 
358 /// Return 'OW_Complete' if a store to the 'Later' location (by \p LaterI
359 /// instruction) completely overwrites a store to the 'Earlier' location.
360 /// (by \p EarlierI instruction).
361 /// Return OW_MaybePartial if \p Later does not completely overwrite
362 /// \p Earlier, but they both write to the same underlying object. In that
363 /// case, use isPartialOverwrite to check if \p Later partially overwrites
364 /// \p Earlier. Returns 'OW_Unknown' if nothing can be determined.
365 static OverwriteResult
366 isOverwrite(const Instruction *LaterI, const Instruction *EarlierI,
367  const MemoryLocation &Later, const MemoryLocation &Earlier,
368  const DataLayout &DL, const TargetLibraryInfo &TLI,
369  int64_t &EarlierOff, int64_t &LaterOff, BatchAAResults &AA,
370  const Function *F) {
371  // FIXME: Vet that this works for size upper-bounds. Seems unlikely that we'll
372  // get imprecise values here, though (except for unknown sizes).
373  if (!Later.Size.isPrecise() || !Earlier.Size.isPrecise()) {
374  // In case no constant size is known, try to an IR values for the number
375  // of bytes written and check if they match.
376  const auto *LaterMemI = dyn_cast<MemIntrinsic>(LaterI);
377  const auto *EarlierMemI = dyn_cast<MemIntrinsic>(EarlierI);
378  if (LaterMemI && EarlierMemI) {
379  const Value *LaterV = LaterMemI->getLength();
380  const Value *EarlierV = EarlierMemI->getLength();
381  if (LaterV == EarlierV && AA.isMustAlias(Earlier, Later))
382  return OW_Complete;
383  }
384 
385  // Masked stores have imprecise locations, but we can reason about them
386  // to some extent.
387  return isMaskedStoreOverwrite(LaterI, EarlierI, AA);
388  }
389 
390  const uint64_t LaterSize = Later.Size.getValue();
391  const uint64_t EarlierSize = Earlier.Size.getValue();
392 
393  // Query the alias information
394  AliasResult AAR = AA.alias(Later, Earlier);
395 
396  // If the start pointers are the same, we just have to compare sizes to see if
397  // the later store was larger than the earlier store.
398  if (AAR == AliasResult::MustAlias) {
399  // Make sure that the Later size is >= the Earlier size.
400  if (LaterSize >= EarlierSize)
401  return OW_Complete;
402  }
403 
404  // If we hit a partial alias we may have a full overwrite
405  if (AAR == AliasResult::PartialAlias && AAR.hasOffset()) {
406  int32_t Off = AAR.getOffset();
407  if (Off >= 0 && (uint64_t)Off + EarlierSize <= LaterSize)
408  return OW_Complete;
409  }
410 
411  // Check to see if the later store is to the entire object (either a global,
412  // an alloca, or a byval/inalloca argument). If so, then it clearly
413  // overwrites any other store to the same object.
414  const Value *P1 = Earlier.Ptr->stripPointerCasts();
415  const Value *P2 = Later.Ptr->stripPointerCasts();
416  const Value *UO1 = getUnderlyingObject(P1), *UO2 = getUnderlyingObject(P2);
417 
418  // If we can't resolve the same pointers to the same object, then we can't
419  // analyze them at all.
420  if (UO1 != UO2)
421  return OW_Unknown;
422 
423  // If the "Later" store is to a recognizable object, get its size.
424  uint64_t ObjectSize = getPointerSize(UO2, DL, TLI, F);
425  if (ObjectSize != MemoryLocation::UnknownSize)
426  if (ObjectSize == LaterSize && ObjectSize >= EarlierSize)
427  return OW_Complete;
428 
429  // Okay, we have stores to two completely different pointers. Try to
430  // decompose the pointer into a "base + constant_offset" form. If the base
431  // pointers are equal, then we can reason about the two stores.
432  EarlierOff = 0;
433  LaterOff = 0;
434  const Value *BP1 = GetPointerBaseWithConstantOffset(P1, EarlierOff, DL);
435  const Value *BP2 = GetPointerBaseWithConstantOffset(P2, LaterOff, DL);
436 
437  // If the base pointers still differ, we have two completely different stores.
438  if (BP1 != BP2)
439  return OW_Unknown;
440 
441  // The later access completely overlaps the earlier store if and only if
442  // both start and end of the earlier one is "inside" the later one:
443  // |<->|--earlier--|<->|
444  // |-------later-------|
445  // Accesses may overlap if and only if start of one of them is "inside"
446  // another one:
447  // |<->|--earlier--|<----->|
448  // |-------later-------|
449  // OR
450  // |----- earlier -----|
451  // |<->|---later---|<----->|
452  //
453  // We have to be careful here as *Off is signed while *.Size is unsigned.
454 
455  // Check if the earlier access starts "not before" the later one.
456  if (EarlierOff >= LaterOff) {
457  // If the earlier access ends "not after" the later access then the earlier
458  // one is completely overwritten by the later one.
459  if (uint64_t(EarlierOff - LaterOff) + EarlierSize <= LaterSize)
460  return OW_Complete;
461  // If start of the earlier access is "before" end of the later access then
462  // accesses overlap.
463  else if ((uint64_t)(EarlierOff - LaterOff) < LaterSize)
464  return OW_MaybePartial;
465  }
466  // If start of the later access is "before" end of the earlier access then
467  // accesses overlap.
468  else if ((uint64_t)(LaterOff - EarlierOff) < EarlierSize) {
469  return OW_MaybePartial;
470  }
471 
472  // Can reach here only if accesses are known not to overlap. There is no
473  // dedicated code to indicate no overlap so signal "unknown".
474  return OW_Unknown;
475 }
476 
477 /// Return 'OW_Complete' if a store to the 'Later' location completely
478 /// overwrites a store to the 'Earlier' location, 'OW_End' if the end of the
479 /// 'Earlier' location is completely overwritten by 'Later', 'OW_Begin' if the
480 /// beginning of the 'Earlier' location is overwritten by 'Later'.
481 /// 'OW_PartialEarlierWithFullLater' means that an earlier (big) store was
482 /// overwritten by a latter (smaller) store which doesn't write outside the big
483 /// store's memory locations. Returns 'OW_Unknown' if nothing can be determined.
484 /// NOTE: This function must only be called if both \p Later and \p Earlier
485 /// write to the same underlying object with valid \p EarlierOff and \p
486 /// LaterOff.
487 static OverwriteResult isPartialOverwrite(const MemoryLocation &Later,
488  const MemoryLocation &Earlier,
489  int64_t EarlierOff, int64_t LaterOff,
490  Instruction *DepWrite,
491  InstOverlapIntervalsTy &IOL) {
492  const uint64_t LaterSize = Later.Size.getValue();
493  const uint64_t EarlierSize = Earlier.Size.getValue();
494  // We may now overlap, although the overlap is not complete. There might also
495  // be other incomplete overlaps, and together, they might cover the complete
496  // earlier write.
497  // Note: The correctness of this logic depends on the fact that this function
498  // is not even called providing DepWrite when there are any intervening reads.
500  LaterOff < int64_t(EarlierOff + EarlierSize) &&
501  int64_t(LaterOff + LaterSize) >= EarlierOff) {
502 
503  // Insert our part of the overlap into the map.
504  auto &IM = IOL[DepWrite];
505  LLVM_DEBUG(dbgs() << "DSE: Partial overwrite: Earlier [" << EarlierOff
506  << ", " << int64_t(EarlierOff + EarlierSize)
507  << ") Later [" << LaterOff << ", "
508  << int64_t(LaterOff + LaterSize) << ")\n");
509 
510  // Make sure that we only insert non-overlapping intervals and combine
511  // adjacent intervals. The intervals are stored in the map with the ending
512  // offset as the key (in the half-open sense) and the starting offset as
513  // the value.
514  int64_t LaterIntStart = LaterOff, LaterIntEnd = LaterOff + LaterSize;
515 
516  // Find any intervals ending at, or after, LaterIntStart which start
517  // before LaterIntEnd.
518  auto ILI = IM.lower_bound(LaterIntStart);
519  if (ILI != IM.end() && ILI->second <= LaterIntEnd) {
520  // This existing interval is overlapped with the current store somewhere
521  // in [LaterIntStart, LaterIntEnd]. Merge them by erasing the existing
522  // intervals and adjusting our start and end.
523  LaterIntStart = std::min(LaterIntStart, ILI->second);
524  LaterIntEnd = std::max(LaterIntEnd, ILI->first);
525  ILI = IM.erase(ILI);
526 
527  // Continue erasing and adjusting our end in case other previous
528  // intervals are also overlapped with the current store.
529  //
530  // |--- ealier 1 ---| |--- ealier 2 ---|
531  // |------- later---------|
532  //
533  while (ILI != IM.end() && ILI->second <= LaterIntEnd) {
534  assert(ILI->second > LaterIntStart && "Unexpected interval");
535  LaterIntEnd = std::max(LaterIntEnd, ILI->first);
536  ILI = IM.erase(ILI);
537  }
538  }
539 
540  IM[LaterIntEnd] = LaterIntStart;
541 
542  ILI = IM.begin();
543  if (ILI->second <= EarlierOff &&
544  ILI->first >= int64_t(EarlierOff + EarlierSize)) {
545  LLVM_DEBUG(dbgs() << "DSE: Full overwrite from partials: Earlier ["
546  << EarlierOff << ", "
547  << int64_t(EarlierOff + EarlierSize)
548  << ") Composite Later [" << ILI->second << ", "
549  << ILI->first << ")\n");
550  ++NumCompletePartials;
551  return OW_Complete;
552  }
553  }
554 
555  // Check for an earlier store which writes to all the memory locations that
556  // the later store writes to.
557  if (EnablePartialStoreMerging && LaterOff >= EarlierOff &&
558  int64_t(EarlierOff + EarlierSize) > LaterOff &&
559  uint64_t(LaterOff - EarlierOff) + LaterSize <= EarlierSize) {
560  LLVM_DEBUG(dbgs() << "DSE: Partial overwrite an earlier load ["
561  << EarlierOff << ", "
562  << int64_t(EarlierOff + EarlierSize)
563  << ") by a later store [" << LaterOff << ", "
564  << int64_t(LaterOff + LaterSize) << ")\n");
565  // TODO: Maybe come up with a better name?
566  return OW_PartialEarlierWithFullLater;
567  }
568 
569  // Another interesting case is if the later store overwrites the end of the
570  // earlier store.
571  //
572  // |--earlier--|
573  // |-- later --|
574  //
575  // In this case we may want to trim the size of earlier to avoid generating
576  // writes to addresses which will definitely be overwritten later
578  (LaterOff > EarlierOff && LaterOff < int64_t(EarlierOff + EarlierSize) &&
579  int64_t(LaterOff + LaterSize) >= int64_t(EarlierOff + EarlierSize)))
580  return OW_End;
581 
582  // Finally, we also need to check if the later store overwrites the beginning
583  // of the earlier store.
584  //
585  // |--earlier--|
586  // |-- later --|
587  //
588  // In this case we may want to move the destination address and trim the size
589  // of earlier to avoid generating writes to addresses which will definitely
590  // be overwritten later.
592  (LaterOff <= EarlierOff && int64_t(LaterOff + LaterSize) > EarlierOff)) {
593  assert(int64_t(LaterOff + LaterSize) < int64_t(EarlierOff + EarlierSize) &&
594  "Expect to be handled as OW_Complete");
595  return OW_Begin;
596  }
597  // Otherwise, they don't completely overlap.
598  return OW_Unknown;
599 }
600 
601 /// Returns true if the memory which is accessed by the second instruction is not
602 /// modified between the first and the second instruction.
603 /// Precondition: Second instruction must be dominated by the first
604 /// instruction.
605 static bool
607  BatchAAResults &AA, const DataLayout &DL,
608  DominatorTree *DT) {
609  // Do a backwards scan through the CFG from SecondI to FirstI. Look for
610  // instructions which can modify the memory location accessed by SecondI.
611  //
612  // While doing the walk keep track of the address to check. It might be
613  // different in different basic blocks due to PHI translation.
614  using BlockAddressPair = std::pair<BasicBlock *, PHITransAddr>;
616  // Keep track of the address we visited each block with. Bail out if we
617  // visit a block with different addresses.
619 
620  BasicBlock::iterator FirstBBI(FirstI);
621  ++FirstBBI;
622  BasicBlock::iterator SecondBBI(SecondI);
623  BasicBlock *FirstBB = FirstI->getParent();
624  BasicBlock *SecondBB = SecondI->getParent();
625  MemoryLocation MemLoc = MemoryLocation::get(SecondI);
626  auto *MemLocPtr = const_cast<Value *>(MemLoc.Ptr);
627 
628  // Start checking the SecondBB.
629  WorkList.push_back(
630  std::make_pair(SecondBB, PHITransAddr(MemLocPtr, DL, nullptr)));
631  bool isFirstBlock = true;
632 
633  // Check all blocks going backward until we reach the FirstBB.
634  while (!WorkList.empty()) {
635  BlockAddressPair Current = WorkList.pop_back_val();
636  BasicBlock *B = Current.first;
637  PHITransAddr &Addr = Current.second;
638  Value *Ptr = Addr.getAddr();
639 
640  // Ignore instructions before FirstI if this is the FirstBB.
641  BasicBlock::iterator BI = (B == FirstBB ? FirstBBI : B->begin());
642 
644  if (isFirstBlock) {
645  // Ignore instructions after SecondI if this is the first visit of SecondBB.
646  assert(B == SecondBB && "first block is not the store block");
647  EI = SecondBBI;
648  isFirstBlock = false;
649  } else {
650  // It's not SecondBB or (in case of a loop) the second visit of SecondBB.
651  // In this case we also have to look at instructions after SecondI.
652  EI = B->end();
653  }
654  for (; BI != EI; ++BI) {
655  Instruction *I = &*BI;
656  if (I->mayWriteToMemory() && I != SecondI)
657  if (isModSet(AA.getModRefInfo(I, MemLoc.getWithNewPtr(Ptr))))
658  return false;
659  }
660  if (B != FirstBB) {
661  assert(B != &FirstBB->getParent()->getEntryBlock() &&
662  "Should not hit the entry block because SI must be dominated by LI");
663  for (BasicBlock *Pred : predecessors(B)) {
664  PHITransAddr PredAddr = Addr;
665  if (PredAddr.NeedsPHITranslationFromBlock(B)) {
666  if (!PredAddr.IsPotentiallyPHITranslatable())
667  return false;
668  if (PredAddr.PHITranslateValue(B, Pred, DT, false))
669  return false;
670  }
671  Value *TranslatedPtr = PredAddr.getAddr();
672  auto Inserted = Visited.insert(std::make_pair(Pred, TranslatedPtr));
673  if (!Inserted.second) {
674  // We already visited this block before. If it was with a different
675  // address - bail out!
676  if (TranslatedPtr != Inserted.first->second)
677  return false;
678  // ... otherwise just skip it.
679  continue;
680  }
681  WorkList.push_back(std::make_pair(Pred, PredAddr));
682  }
683  }
684  }
685  return true;
686 }
687 
688 static bool tryToShorten(Instruction *EarlierWrite, int64_t &EarlierStart,
689  uint64_t &EarlierSize, int64_t LaterStart,
690  uint64_t LaterSize, bool IsOverwriteEnd) {
691  auto *EarlierIntrinsic = cast<AnyMemIntrinsic>(EarlierWrite);
692  Align PrefAlign = EarlierIntrinsic->getDestAlign().valueOrOne();
693 
694  // We assume that memet/memcpy operates in chunks of the "largest" native
695  // type size and aligned on the same value. That means optimal start and size
696  // of memset/memcpy should be modulo of preferred alignment of that type. That
697  // is it there is no any sense in trying to reduce store size any further
698  // since any "extra" stores comes for free anyway.
699  // On the other hand, maximum alignment we can achieve is limited by alignment
700  // of initial store.
701 
702  // TODO: Limit maximum alignment by preferred (or abi?) alignment of the
703  // "largest" native type.
704  // Note: What is the proper way to get that value?
705  // Should TargetTransformInfo::getRegisterBitWidth be used or anything else?
706  // PrefAlign = std::min(DL.getPrefTypeAlign(LargestType), PrefAlign);
707 
708  int64_t ToRemoveStart = 0;
709  uint64_t ToRemoveSize = 0;
710  // Compute start and size of the region to remove. Make sure 'PrefAlign' is
711  // maintained on the remaining store.
712  if (IsOverwriteEnd) {
713  // Calculate required adjustment for 'LaterStart'in order to keep remaining
714  // store size aligned on 'PerfAlign'.
715  uint64_t Off =
716  offsetToAlignment(uint64_t(LaterStart - EarlierStart), PrefAlign);
717  ToRemoveStart = LaterStart + Off;
718  if (EarlierSize <= uint64_t(ToRemoveStart - EarlierStart))
719  return false;
720  ToRemoveSize = EarlierSize - uint64_t(ToRemoveStart - EarlierStart);
721  } else {
722  ToRemoveStart = EarlierStart;
723  assert(LaterSize >= uint64_t(EarlierStart - LaterStart) &&
724  "Not overlapping accesses?");
725  ToRemoveSize = LaterSize - uint64_t(EarlierStart - LaterStart);
726  // Calculate required adjustment for 'ToRemoveSize'in order to keep
727  // start of the remaining store aligned on 'PerfAlign'.
728  uint64_t Off = offsetToAlignment(ToRemoveSize, PrefAlign);
729  if (Off != 0) {
730  if (ToRemoveSize <= (PrefAlign.value() - Off))
731  return false;
732  ToRemoveSize -= PrefAlign.value() - Off;
733  }
734  assert(isAligned(PrefAlign, ToRemoveSize) &&
735  "Should preserve selected alignment");
736  }
737 
738  assert(ToRemoveSize > 0 && "Shouldn't reach here if nothing to remove");
739  assert(EarlierSize > ToRemoveSize && "Can't remove more than original size");
740 
741  uint64_t NewSize = EarlierSize - ToRemoveSize;
742  if (auto *AMI = dyn_cast<AtomicMemIntrinsic>(EarlierWrite)) {
743  // When shortening an atomic memory intrinsic, the newly shortened
744  // length must remain an integer multiple of the element size.
745  const uint32_t ElementSize = AMI->getElementSizeInBytes();
746  if (0 != NewSize % ElementSize)
747  return false;
748  }
749 
750  LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW "
751  << (IsOverwriteEnd ? "END" : "BEGIN") << ": "
752  << *EarlierWrite << "\n KILLER [" << ToRemoveStart << ", "
753  << int64_t(ToRemoveStart + ToRemoveSize) << ")\n");
754 
755  Value *EarlierWriteLength = EarlierIntrinsic->getLength();
756  Value *TrimmedLength =
757  ConstantInt::get(EarlierWriteLength->getType(), NewSize);
758  EarlierIntrinsic->setLength(TrimmedLength);
759  EarlierIntrinsic->setDestAlignment(PrefAlign);
760 
761  if (!IsOverwriteEnd) {
762  Value *Indices[1] = {
763  ConstantInt::get(EarlierWriteLength->getType(), ToRemoveSize)};
765  EarlierIntrinsic->getRawDest()->getType()->getPointerElementType(),
766  EarlierIntrinsic->getRawDest(), Indices, "", EarlierWrite);
767  NewDestGEP->setDebugLoc(EarlierIntrinsic->getDebugLoc());
768  EarlierIntrinsic->setDest(NewDestGEP);
769  }
770 
771  // Finally update start and size of earlier access.
772  if (!IsOverwriteEnd)
773  EarlierStart += ToRemoveSize;
774  EarlierSize = NewSize;
775 
776  return true;
777 }
778 
779 static bool tryToShortenEnd(Instruction *EarlierWrite,
781  int64_t &EarlierStart, uint64_t &EarlierSize) {
782  if (IntervalMap.empty() || !isShortenableAtTheEnd(EarlierWrite))
783  return false;
784 
785  OverlapIntervalsTy::iterator OII = --IntervalMap.end();
786  int64_t LaterStart = OII->second;
787  uint64_t LaterSize = OII->first - LaterStart;
788 
789  assert(OII->first - LaterStart >= 0 && "Size expected to be positive");
790 
791  if (LaterStart > EarlierStart &&
792  // Note: "LaterStart - EarlierStart" is known to be positive due to
793  // preceding check.
794  (uint64_t)(LaterStart - EarlierStart) < EarlierSize &&
795  // Note: "EarlierSize - (uint64_t)(LaterStart - EarlierStart)" is known to
796  // be non negative due to preceding checks.
797  LaterSize >= EarlierSize - (uint64_t)(LaterStart - EarlierStart)) {
798  if (tryToShorten(EarlierWrite, EarlierStart, EarlierSize, LaterStart,
799  LaterSize, true)) {
800  IntervalMap.erase(OII);
801  return true;
802  }
803  }
804  return false;
805 }
806 
807 static bool tryToShortenBegin(Instruction *EarlierWrite,
809  int64_t &EarlierStart, uint64_t &EarlierSize) {
810  if (IntervalMap.empty() || !isShortenableAtTheBeginning(EarlierWrite))
811  return false;
812 
813  OverlapIntervalsTy::iterator OII = IntervalMap.begin();
814  int64_t LaterStart = OII->second;
815  uint64_t LaterSize = OII->first - LaterStart;
816 
817  assert(OII->first - LaterStart >= 0 && "Size expected to be positive");
818 
819  if (LaterStart <= EarlierStart &&
820  // Note: "EarlierStart - LaterStart" is known to be non negative due to
821  // preceding check.
822  LaterSize > (uint64_t)(EarlierStart - LaterStart)) {
823  // Note: "LaterSize - (uint64_t)(EarlierStart - LaterStart)" is known to be
824  // positive due to preceding checks.
825  assert(LaterSize - (uint64_t)(EarlierStart - LaterStart) < EarlierSize &&
826  "Should have been handled as OW_Complete");
827  if (tryToShorten(EarlierWrite, EarlierStart, EarlierSize, LaterStart,
828  LaterSize, false)) {
829  IntervalMap.erase(OII);
830  return true;
831  }
832  }
833  return false;
834 }
835 
838  const TargetLibraryInfo &TLI) {
839  bool Changed = false;
840  for (auto OI : IOL) {
841  Instruction *EarlierWrite = OI.first;
842  MemoryLocation Loc = getLocForWrite(EarlierWrite, TLI);
843  assert(isRemovable(EarlierWrite) && "Expect only removable instruction");
844 
845  const Value *Ptr = Loc.Ptr->stripPointerCasts();
846  int64_t EarlierStart = 0;
847  uint64_t EarlierSize = Loc.Size.getValue();
848  GetPointerBaseWithConstantOffset(Ptr, EarlierStart, DL);
849  OverlapIntervalsTy &IntervalMap = OI.second;
850  Changed |=
851  tryToShortenEnd(EarlierWrite, IntervalMap, EarlierStart, EarlierSize);
852  if (IntervalMap.empty())
853  continue;
854  Changed |=
855  tryToShortenBegin(EarlierWrite, IntervalMap, EarlierStart, EarlierSize);
856  }
857  return Changed;
858 }
859 
861  StoreInst *Earlier, StoreInst *Later, int64_t InstWriteOffset,
862  int64_t DepWriteOffset, const DataLayout &DL, BatchAAResults &AA,
863  DominatorTree *DT) {
864 
865  if (Earlier && isa<ConstantInt>(Earlier->getValueOperand()) &&
866  DL.typeSizeEqualsStoreSize(Earlier->getValueOperand()->getType()) &&
867  Later && isa<ConstantInt>(Later->getValueOperand()) &&
868  DL.typeSizeEqualsStoreSize(Later->getValueOperand()->getType()) &&
869  memoryIsNotModifiedBetween(Earlier, Later, AA, DL, DT)) {
870  // If the store we find is:
871  // a) partially overwritten by the store to 'Loc'
872  // b) the later store is fully contained in the earlier one and
873  // c) they both have a constant value
874  // d) none of the two stores need padding
875  // Merge the two stores, replacing the earlier store's value with a
876  // merge of both values.
877  // TODO: Deal with other constant types (vectors, etc), and probably
878  // some mem intrinsics (if needed)
879 
880  APInt EarlierValue =
881  cast<ConstantInt>(Earlier->getValueOperand())->getValue();
882  APInt LaterValue = cast<ConstantInt>(Later->getValueOperand())->getValue();
883  unsigned LaterBits = LaterValue.getBitWidth();
884  assert(EarlierValue.getBitWidth() > LaterValue.getBitWidth());
885  LaterValue = LaterValue.zext(EarlierValue.getBitWidth());
886 
887  // Offset of the smaller store inside the larger store
888  unsigned BitOffsetDiff = (InstWriteOffset - DepWriteOffset) * 8;
889  unsigned LShiftAmount = DL.isBigEndian() ? EarlierValue.getBitWidth() -
890  BitOffsetDiff - LaterBits
891  : BitOffsetDiff;
892  APInt Mask = APInt::getBitsSet(EarlierValue.getBitWidth(), LShiftAmount,
893  LShiftAmount + LaterBits);
894  // Clear the bits we'll be replacing, then OR with the smaller
895  // store, shifted appropriately.
896  APInt Merged = (EarlierValue & ~Mask) | (LaterValue << LShiftAmount);
897  LLVM_DEBUG(dbgs() << "DSE: Merge Stores:\n Earlier: " << *Earlier
898  << "\n Later: " << *Later
899  << "\n Merged Value: " << Merged << '\n');
900  return ConstantInt::get(Earlier->getValueOperand()->getType(), Merged);
901  }
902  return nullptr;
903 }
904 
905 namespace {
906 // Returns true if \p I is an intrisnic that does not read or write memory.
907 bool isNoopIntrinsic(Instruction *I) {
908  if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
909  switch (II->getIntrinsicID()) {
910  case Intrinsic::lifetime_start:
911  case Intrinsic::lifetime_end:
912  case Intrinsic::invariant_end:
913  case Intrinsic::launder_invariant_group:
914  case Intrinsic::assume:
915  return true;
916  case Intrinsic::dbg_addr:
917  case Intrinsic::dbg_declare:
918  case Intrinsic::dbg_label:
919  case Intrinsic::dbg_value:
920  llvm_unreachable("Intrinsic should not be modeled in MemorySSA");
921  default:
922  return false;
923  }
924  }
925  return false;
926 }
927 
928 // Check if we can ignore \p D for DSE.
929 bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) {
930  Instruction *DI = D->getMemoryInst();
931  // Calls that only access inaccessible memory cannot read or write any memory
932  // locations we consider for elimination.
933  if (auto *CB = dyn_cast<CallBase>(DI))
934  if (CB->onlyAccessesInaccessibleMemory())
935  return true;
936 
937  // We can eliminate stores to locations not visible to the caller across
938  // throwing instructions.
939  if (DI->mayThrow() && !DefVisibleToCaller)
940  return true;
941 
942  // We can remove the dead stores, irrespective of the fence and its ordering
943  // (release/acquire/seq_cst). Fences only constraints the ordering of
944  // already visible stores, it does not make a store visible to other
945  // threads. So, skipping over a fence does not change a store from being
946  // dead.
947  if (isa<FenceInst>(DI))
948  return true;
949 
950  // Skip intrinsics that do not really read or modify memory.
951  if (isNoopIntrinsic(D->getMemoryInst()))
952  return true;
953 
954  return false;
955 }
956 
957 struct DSEState {
958  Function &F;
959  AliasAnalysis &AA;
960 
961  /// The single BatchAA instance that is used to cache AA queries. It will
962  /// not be invalidated over the whole run. This is safe, because:
963  /// 1. Only memory writes are removed, so the alias cache for memory
964  /// locations remains valid.
965  /// 2. No new instructions are added (only instructions removed), so cached
966  /// information for a deleted value cannot be accessed by a re-used new
967  /// value pointer.
968  BatchAAResults BatchAA;
969 
970  MemorySSA &MSSA;
971  DominatorTree &DT;
972  PostDominatorTree &PDT;
973  const TargetLibraryInfo &TLI;
974  const DataLayout &DL;
975 
976  // All MemoryDefs that potentially could kill other MemDefs.
978  // Any that should be skipped as they are already deleted
980  // Keep track of all of the objects that are invisible to the caller before
981  // the function returns.
982  // SmallPtrSet<const Value *, 16> InvisibleToCallerBeforeRet;
983  DenseMap<const Value *, bool> InvisibleToCallerBeforeRet;
984  // Keep track of all of the objects that are invisible to the caller after
985  // the function returns.
986  DenseMap<const Value *, bool> InvisibleToCallerAfterRet;
987  // Keep track of blocks with throwing instructions not modeled in MemorySSA.
988  SmallPtrSet<BasicBlock *, 16> ThrowingBlocks;
989  // Post-order numbers for each basic block. Used to figure out if memory
990  // accesses are executed before another access.
991  DenseMap<BasicBlock *, unsigned> PostOrderNumbers;
992 
993  /// Keep track of instructions (partly) overlapping with killing MemoryDefs per
994  /// basic block.
996 
997  DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT,
998  PostDominatorTree &PDT, const TargetLibraryInfo &TLI)
999  : F(F), AA(AA), BatchAA(AA), MSSA(MSSA), DT(DT), PDT(PDT), TLI(TLI),
1000  DL(F.getParent()->getDataLayout()) {}
1001 
1002  static DSEState get(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
1003  DominatorTree &DT, PostDominatorTree &PDT,
1004  const TargetLibraryInfo &TLI) {
1005  DSEState State(F, AA, MSSA, DT, PDT, TLI);
1006  // Collect blocks with throwing instructions not modeled in MemorySSA and
1007  // alloc-like objects.
1008  unsigned PO = 0;
1009  for (BasicBlock *BB : post_order(&F)) {
1010  State.PostOrderNumbers[BB] = PO++;
1011  for (Instruction &I : *BB) {
1012  MemoryAccess *MA = MSSA.getMemoryAccess(&I);
1013  if (I.mayThrow() && !MA)
1014  State.ThrowingBlocks.insert(I.getParent());
1015 
1016  auto *MD = dyn_cast_or_null<MemoryDef>(MA);
1017  if (MD && State.MemDefs.size() < MemorySSADefsPerBlockLimit &&
1018  (State.getLocForWriteEx(&I) || State.isMemTerminatorInst(&I)))
1019  State.MemDefs.push_back(MD);
1020  }
1021  }
1022 
1023  // Treat byval or inalloca arguments the same as Allocas, stores to them are
1024  // dead at the end of the function.
1025  for (Argument &AI : F.args())
1026  if (AI.hasPassPointeeByValueCopyAttr()) {
1027  // For byval, the caller doesn't know the address of the allocation.
1028  if (AI.hasByValAttr())
1029  State.InvisibleToCallerBeforeRet.insert({&AI, true});
1030  State.InvisibleToCallerAfterRet.insert({&AI, true});
1031  }
1032 
1033  return State;
1034  }
1035 
1036  bool isInvisibleToCallerAfterRet(const Value *V) {
1037  if (isa<AllocaInst>(V))
1038  return true;
1039  auto I = InvisibleToCallerAfterRet.insert({V, false});
1040  if (I.second) {
1041  if (!isInvisibleToCallerBeforeRet(V)) {
1042  I.first->second = false;
1043  } else {
1044  auto *Inst = dyn_cast<Instruction>(V);
1045  if (Inst && isAllocLikeFn(Inst, &TLI))
1046  I.first->second = !PointerMayBeCaptured(V, true, false);
1047  }
1048  }
1049  return I.first->second;
1050  }
1051 
1052  bool isInvisibleToCallerBeforeRet(const Value *V) {
1053  if (isa<AllocaInst>(V))
1054  return true;
1055  auto I = InvisibleToCallerBeforeRet.insert({V, false});
1056  if (I.second) {
1057  auto *Inst = dyn_cast<Instruction>(V);
1058  if (Inst && isAllocLikeFn(Inst, &TLI))
1059  // NOTE: This could be made more precise by PointerMayBeCapturedBefore
1060  // with the killing MemoryDef. But we refrain from doing so for now to
1061  // limit compile-time and this does not cause any changes to the number
1062  // of stores removed on a large test set in practice.
1063  I.first->second = !PointerMayBeCaptured(V, false, true);
1064  }
1065  return I.first->second;
1066  }
1067 
1068  Optional<MemoryLocation> getLocForWriteEx(Instruction *I) const {
1069  if (!I->mayWriteToMemory())
1070  return None;
1071 
1072  if (auto *MTI = dyn_cast<AnyMemIntrinsic>(I))
1073  return {MemoryLocation::getForDest(MTI)};
1074 
1075  if (auto *CB = dyn_cast<CallBase>(I)) {
1076  // If the functions may write to memory we do not know about, bail out.
1077  if (!CB->onlyAccessesArgMemory() &&
1078  !CB->onlyAccessesInaccessibleMemOrArgMem())
1079  return None;
1080 
1081  LibFunc LF;
1082  if (TLI.getLibFunc(*CB, LF) && TLI.has(LF)) {
1083  switch (LF) {
1084  case LibFunc_strcpy:
1085  case LibFunc_strncpy:
1086  case LibFunc_strcat:
1087  case LibFunc_strncat:
1088  return {MemoryLocation::getAfter(CB->getArgOperand(0))};
1089  default:
1090  break;
1091  }
1092  }
1093  switch (CB->getIntrinsicID()) {
1094  case Intrinsic::init_trampoline:
1095  return {MemoryLocation::getAfter(CB->getArgOperand(0))};
1096  case Intrinsic::masked_store:
1097  return {MemoryLocation::getForArgument(CB, 1, TLI)};
1098  default:
1099  break;
1100  }
1101  return None;
1102  }
1103 
1104  return MemoryLocation::getOrNone(I);
1105  }
1106 
1107  /// Returns true if \p UseInst completely overwrites \p DefLoc
1108  /// (stored by \p DefInst).
1109  bool isCompleteOverwrite(const MemoryLocation &DefLoc, Instruction *DefInst,
1110  Instruction *UseInst) {
1111  // UseInst has a MemoryDef associated in MemorySSA. It's possible for a
1112  // MemoryDef to not write to memory, e.g. a volatile load is modeled as a
1113  // MemoryDef.
1114  if (!UseInst->mayWriteToMemory())
1115  return false;
1116 
1117  if (auto *CB = dyn_cast<CallBase>(UseInst))
1118  if (CB->onlyAccessesInaccessibleMemory())
1119  return false;
1120 
1121  int64_t InstWriteOffset, DepWriteOffset;
1122  if (auto CC = getLocForWriteEx(UseInst))
1123  return isOverwrite(UseInst, DefInst, *CC, DefLoc, DL, TLI, DepWriteOffset,
1124  InstWriteOffset, BatchAA, &F) == OW_Complete;
1125  return false;
1126  }
1127 
1128  /// Returns true if \p Def is not read before returning from the function.
1129  bool isWriteAtEndOfFunction(MemoryDef *Def) {
1130  LLVM_DEBUG(dbgs() << " Check if def " << *Def << " ("
1131  << *Def->getMemoryInst()
1132  << ") is at the end the function \n");
1133 
1134  auto MaybeLoc = getLocForWriteEx(Def->getMemoryInst());
1135  if (!MaybeLoc) {
1136  LLVM_DEBUG(dbgs() << " ... could not get location for write.\n");
1137  return false;
1138  }
1139 
1142  auto PushMemUses = [&WorkList, &Visited](MemoryAccess *Acc) {
1143  if (!Visited.insert(Acc).second)
1144  return;
1145  for (Use &U : Acc->uses())
1146  WorkList.push_back(cast<MemoryAccess>(U.getUser()));
1147  };
1148  PushMemUses(Def);
1149  for (unsigned I = 0; I < WorkList.size(); I++) {
1150  if (WorkList.size() >= MemorySSAScanLimit) {
1151  LLVM_DEBUG(dbgs() << " ... hit exploration limit.\n");
1152  return false;
1153  }
1154 
1155  MemoryAccess *UseAccess = WorkList[I];
1156  // Simply adding the users of MemoryPhi to the worklist is not enough,
1157  // because we might miss read clobbers in different iterations of a loop,
1158  // for example.
1159  // TODO: Add support for phi translation to handle the loop case.
1160  if (isa<MemoryPhi>(UseAccess))
1161  return false;
1162 
1163  // TODO: Checking for aliasing is expensive. Consider reducing the amount
1164  // of times this is called and/or caching it.
1165  Instruction *UseInst = cast<MemoryUseOrDef>(UseAccess)->getMemoryInst();
1166  if (isReadClobber(*MaybeLoc, UseInst)) {
1167  LLVM_DEBUG(dbgs() << " ... hit read clobber " << *UseInst << ".\n");
1168  return false;
1169  }
1170 
1171  if (MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess))
1172  PushMemUses(UseDef);
1173  }
1174  return true;
1175  }
1176 
1177  /// If \p I is a memory terminator like llvm.lifetime.end or free, return a
1178  /// pair with the MemoryLocation terminated by \p I and a boolean flag
1179  /// indicating whether \p I is a free-like call.
1181  getLocForTerminator(Instruction *I) const {
1182  uint64_t Len;
1183  Value *Ptr;
1184  if (match(I, m_Intrinsic<Intrinsic::lifetime_end>(m_ConstantInt(Len),
1185  m_Value(Ptr))))
1186  return {std::make_pair(MemoryLocation(Ptr, Len), false)};
1187 
1188  if (auto *CB = dyn_cast<CallBase>(I)) {
1189  if (isFreeCall(I, &TLI))
1190  return {std::make_pair(MemoryLocation::getAfter(CB->getArgOperand(0)),
1191  true)};
1192  }
1193 
1194  return None;
1195  }
1196 
1197  /// Returns true if \p I is a memory terminator instruction like
1198  /// llvm.lifetime.end or free.
1199  bool isMemTerminatorInst(Instruction *I) const {
1200  IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
1201  return (II && II->getIntrinsicID() == Intrinsic::lifetime_end) ||
1202  isFreeCall(I, &TLI);
1203  }
1204 
1205  /// Returns true if \p MaybeTerm is a memory terminator for \p Loc from
1206  /// instruction \p AccessI.
1207  bool isMemTerminator(const MemoryLocation &Loc, Instruction *AccessI,
1208  Instruction *MaybeTerm) {
1210  getLocForTerminator(MaybeTerm);
1211 
1212  if (!MaybeTermLoc)
1213  return false;
1214 
1215  // If the terminator is a free-like call, all accesses to the underlying
1216  // object can be considered terminated.
1217  if (getUnderlyingObject(Loc.Ptr) !=
1218  getUnderlyingObject(MaybeTermLoc->first.Ptr))
1219  return false;
1220 
1221  auto TermLoc = MaybeTermLoc->first;
1222  if (MaybeTermLoc->second) {
1223  const Value *LocUO = getUnderlyingObject(Loc.Ptr);
1224  return BatchAA.isMustAlias(TermLoc.Ptr, LocUO);
1225  }
1226  int64_t InstWriteOffset, DepWriteOffset;
1227  return isOverwrite(MaybeTerm, AccessI, TermLoc, Loc, DL, TLI,
1228  DepWriteOffset, InstWriteOffset, BatchAA,
1229  &F) == OW_Complete;
1230  }
1231 
1232  // Returns true if \p Use may read from \p DefLoc.
1233  bool isReadClobber(const MemoryLocation &DefLoc, Instruction *UseInst) {
1234  if (isNoopIntrinsic(UseInst))
1235  return false;
1236 
1237  // Monotonic or weaker atomic stores can be re-ordered and do not need to be
1238  // treated as read clobber.
1239  if (auto SI = dyn_cast<StoreInst>(UseInst))
1240  return isStrongerThan(SI->getOrdering(), AtomicOrdering::Monotonic);
1241 
1242  if (!UseInst->mayReadFromMemory())
1243  return false;
1244 
1245  if (auto *CB = dyn_cast<CallBase>(UseInst))
1246  if (CB->onlyAccessesInaccessibleMemory())
1247  return false;
1248 
1249  // NOTE: For calls, the number of stores removed could be slightly improved
1250  // by using AA.callCapturesBefore(UseInst, DefLoc, &DT), but that showed to
1251  // be expensive compared to the benefits in practice. For now, avoid more
1252  // expensive analysis to limit compile-time.
1253  return isRefSet(BatchAA.getModRefInfo(UseInst, DefLoc));
1254  }
1255 
1256  /// Returns true if \p Ptr is guaranteed to be loop invariant for any possible
1257  /// loop. In particular, this guarantees that it only references a single
1258  /// MemoryLocation during execution of the containing function.
1259  bool IsGuaranteedLoopInvariant(Value *Ptr) {
1260  auto IsGuaranteedLoopInvariantBase = [this](Value *Ptr) {
1261  Ptr = Ptr->stripPointerCasts();
1262  if (auto *I = dyn_cast<Instruction>(Ptr)) {
1263  if (isa<AllocaInst>(Ptr))
1264  return true;
1265 
1266  if (isAllocLikeFn(I, &TLI))
1267  return true;
1268 
1269  return false;
1270  }
1271  return true;
1272  };
1273 
1274  Ptr = Ptr->stripPointerCasts();
1275  if (auto *I = dyn_cast<Instruction>(Ptr)) {
1276  if (I->getParent() == &I->getFunction()->getEntryBlock()) {
1277  return true;
1278  }
1279  }
1280  if (auto *GEP = dyn_cast<GEPOperator>(Ptr)) {
1281  return IsGuaranteedLoopInvariantBase(GEP->getPointerOperand()) &&
1282  GEP->hasAllConstantIndices();
1283  }
1284  return IsGuaranteedLoopInvariantBase(Ptr);
1285  }
1286 
1287  // Find a MemoryDef writing to \p DefLoc and dominating \p StartAccess, with
1288  // no read access between them or on any other path to a function exit block
1289  // if \p DefLoc is not accessible after the function returns. If there is no
1290  // such MemoryDef, return None. The returned value may not (completely)
1291  // overwrite \p DefLoc. Currently we bail out when we encounter an aliasing
1292  // MemoryUse (read).
1294  getDomMemoryDef(MemoryDef *KillingDef, MemoryAccess *StartAccess,
1295  const MemoryLocation &DefLoc, const Value *DefUO,
1296  unsigned &ScanLimit, unsigned &WalkerStepLimit,
1297  bool IsMemTerm, unsigned &PartialLimit) {
1298  if (ScanLimit == 0 || WalkerStepLimit == 0) {
1299  LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n");
1300  return None;
1301  }
1302 
1303  MemoryAccess *Current = StartAccess;
1304  Instruction *KillingI = KillingDef->getMemoryInst();
1305  bool StepAgain;
1306  LLVM_DEBUG(dbgs() << " trying to get dominating access\n");
1307 
1308  // Find the next clobbering Mod access for DefLoc, starting at StartAccess.
1309  Optional<MemoryLocation> CurrentLoc;
1310  do {
1311  StepAgain = false;
1312  LLVM_DEBUG({
1313  dbgs() << " visiting " << *Current;
1314  if (!MSSA.isLiveOnEntryDef(Current) && isa<MemoryUseOrDef>(Current))
1315  dbgs() << " (" << *cast<MemoryUseOrDef>(Current)->getMemoryInst()
1316  << ")";
1317  dbgs() << "\n";
1318  });
1319 
1320  // Reached TOP.
1321  if (MSSA.isLiveOnEntryDef(Current)) {
1322  LLVM_DEBUG(dbgs() << " ... found LiveOnEntryDef\n");
1323  return None;
1324  }
1325 
1326  // Cost of a step. Accesses in the same block are more likely to be valid
1327  // candidates for elimination, hence consider them cheaper.
1328  unsigned StepCost = KillingDef->getBlock() == Current->getBlock()
1331  if (WalkerStepLimit <= StepCost) {
1332  LLVM_DEBUG(dbgs() << " ... hit walker step limit\n");
1333  return None;
1334  }
1335  WalkerStepLimit -= StepCost;
1336 
1337  // Return for MemoryPhis. They cannot be eliminated directly and the
1338  // caller is responsible for traversing them.
1339  if (isa<MemoryPhi>(Current)) {
1340  LLVM_DEBUG(dbgs() << " ... found MemoryPhi\n");
1341  return Current;
1342  }
1343 
1344  // Below, check if CurrentDef is a valid candidate to be eliminated by
1345  // KillingDef. If it is not, check the next candidate.
1346  MemoryDef *CurrentDef = cast<MemoryDef>(Current);
1347  Instruction *CurrentI = CurrentDef->getMemoryInst();
1348 
1349  if (canSkipDef(CurrentDef, !isInvisibleToCallerBeforeRet(DefUO))) {
1350  StepAgain = true;
1351  Current = CurrentDef->getDefiningAccess();
1352  continue;
1353  }
1354 
1355  // Before we try to remove anything, check for any extra throwing
1356  // instructions that block us from DSEing
1357  if (mayThrowBetween(KillingI, CurrentI, DefUO)) {
1358  LLVM_DEBUG(dbgs() << " ... skip, may throw!\n");
1359  return None;
1360  }
1361 
1362  // Check for anything that looks like it will be a barrier to further
1363  // removal
1364  if (isDSEBarrier(DefUO, CurrentI)) {
1365  LLVM_DEBUG(dbgs() << " ... skip, barrier\n");
1366  return None;
1367  }
1368 
1369  // If Current is known to be on path that reads DefLoc or is a read
1370  // clobber, bail out, as the path is not profitable. We skip this check
1371  // for intrinsic calls, because the code knows how to handle memcpy
1372  // intrinsics.
1373  if (!isa<IntrinsicInst>(CurrentI) && isReadClobber(DefLoc, CurrentI))
1374  return None;
1375 
1376  // Quick check if there are direct uses that are read-clobbers.
1377  if (any_of(Current->uses(), [this, &DefLoc, StartAccess](Use &U) {
1378  if (auto *UseOrDef = dyn_cast<MemoryUseOrDef>(U.getUser()))
1379  return !MSSA.dominates(StartAccess, UseOrDef) &&
1380  isReadClobber(DefLoc, UseOrDef->getMemoryInst());
1381  return false;
1382  })) {
1383  LLVM_DEBUG(dbgs() << " ... found a read clobber\n");
1384  return None;
1385  }
1386 
1387  // If Current cannot be analyzed or is not removable, check the next
1388  // candidate.
1389  if (!hasAnalyzableMemoryWrite(CurrentI, TLI) || !isRemovable(CurrentI)) {
1390  StepAgain = true;
1391  Current = CurrentDef->getDefiningAccess();
1392  continue;
1393  }
1394 
1395  // If Current does not have an analyzable write location, skip it
1396  CurrentLoc = getLocForWriteEx(CurrentI);
1397  if (!CurrentLoc) {
1398  StepAgain = true;
1399  Current = CurrentDef->getDefiningAccess();
1400  continue;
1401  }
1402 
1403  // AliasAnalysis does not account for loops. Limit elimination to
1404  // candidates for which we can guarantee they always store to the same
1405  // memory location and not multiple locations in a loop.
1406  if (Current->getBlock() != KillingDef->getBlock() &&
1407  !IsGuaranteedLoopInvariant(const_cast<Value *>(CurrentLoc->Ptr))) {
1408  StepAgain = true;
1409  Current = CurrentDef->getDefiningAccess();
1410  WalkerStepLimit -= 1;
1411  continue;
1412  }
1413 
1414  if (IsMemTerm) {
1415  // If the killing def is a memory terminator (e.g. lifetime.end), check
1416  // the next candidate if the current Current does not write the same
1417  // underlying object as the terminator.
1418  if (!isMemTerminator(*CurrentLoc, CurrentI, KillingI)) {
1419  StepAgain = true;
1420  Current = CurrentDef->getDefiningAccess();
1421  }
1422  continue;
1423  } else {
1424  int64_t InstWriteOffset, DepWriteOffset;
1425  auto OR = isOverwrite(KillingI, CurrentI, DefLoc, *CurrentLoc, DL, TLI,
1426  DepWriteOffset, InstWriteOffset, BatchAA, &F);
1427  // If Current does not write to the same object as KillingDef, check
1428  // the next candidate.
1429  if (OR == OW_Unknown) {
1430  StepAgain = true;
1431  Current = CurrentDef->getDefiningAccess();
1432  } else if (OR == OW_MaybePartial) {
1433  // If KillingDef only partially overwrites Current, check the next
1434  // candidate if the partial step limit is exceeded. This aggressively
1435  // limits the number of candidates for partial store elimination,
1436  // which are less likely to be removable in the end.
1437  if (PartialLimit <= 1) {
1438  StepAgain = true;
1439  Current = CurrentDef->getDefiningAccess();
1440  WalkerStepLimit -= 1;
1441  continue;
1442  }
1443  PartialLimit -= 1;
1444  }
1445  }
1446  } while (StepAgain);
1447 
1448  // Accesses to objects accessible after the function returns can only be
1449  // eliminated if the access is killed along all paths to the exit. Collect
1450  // the blocks with killing (=completely overwriting MemoryDefs) and check if
1451  // they cover all paths from EarlierAccess to any function exit.
1452  SmallPtrSet<Instruction *, 16> KillingDefs;
1453  KillingDefs.insert(KillingDef->getMemoryInst());
1454  MemoryAccess *EarlierAccess = Current;
1455  Instruction *EarlierMemInst =
1456  cast<MemoryDef>(EarlierAccess)->getMemoryInst();
1457  LLVM_DEBUG(dbgs() << " Checking for reads of " << *EarlierAccess << " ("
1458  << *EarlierMemInst << ")\n");
1459 
1461  auto PushMemUses = [&WorkList](MemoryAccess *Acc) {
1462  for (Use &U : Acc->uses())
1463  WorkList.insert(cast<MemoryAccess>(U.getUser()));
1464  };
1465  PushMemUses(EarlierAccess);
1466 
1467  // Optimistically collect all accesses for reads. If we do not find any
1468  // read clobbers, add them to the cache.
1469  SmallPtrSet<MemoryAccess *, 16> KnownNoReads;
1470  if (!EarlierMemInst->mayReadFromMemory())
1471  KnownNoReads.insert(EarlierAccess);
1472  // Check if EarlierDef may be read.
1473  for (unsigned I = 0; I < WorkList.size(); I++) {
1474  MemoryAccess *UseAccess = WorkList[I];
1475 
1476  LLVM_DEBUG(dbgs() << " " << *UseAccess);
1477  // Bail out if the number of accesses to check exceeds the scan limit.
1478  if (ScanLimit < (WorkList.size() - I)) {
1479  LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n");
1480  return None;
1481  }
1482  --ScanLimit;
1483  NumDomMemDefChecks++;
1484  KnownNoReads.insert(UseAccess);
1485 
1486  if (isa<MemoryPhi>(UseAccess)) {
1487  if (any_of(KillingDefs, [this, UseAccess](Instruction *KI) {
1488  return DT.properlyDominates(KI->getParent(),
1489  UseAccess->getBlock());
1490  })) {
1491  LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing block\n");
1492  continue;
1493  }
1494  LLVM_DEBUG(dbgs() << "\n ... adding PHI uses\n");
1495  PushMemUses(UseAccess);
1496  continue;
1497  }
1498 
1499  Instruction *UseInst = cast<MemoryUseOrDef>(UseAccess)->getMemoryInst();
1500  LLVM_DEBUG(dbgs() << " (" << *UseInst << ")\n");
1501 
1502  if (any_of(KillingDefs, [this, UseInst](Instruction *KI) {
1503  return DT.dominates(KI, UseInst);
1504  })) {
1505  LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing def\n");
1506  continue;
1507  }
1508 
1509  // A memory terminator kills all preceeding MemoryDefs and all succeeding
1510  // MemoryAccesses. We do not have to check it's users.
1511  if (isMemTerminator(*CurrentLoc, EarlierMemInst, UseInst)) {
1512  LLVM_DEBUG(
1513  dbgs()
1514  << " ... skipping, memterminator invalidates following accesses\n");
1515  continue;
1516  }
1517 
1518  if (isNoopIntrinsic(cast<MemoryUseOrDef>(UseAccess)->getMemoryInst())) {
1519  LLVM_DEBUG(dbgs() << " ... adding uses of intrinsic\n");
1520  PushMemUses(UseAccess);
1521  continue;
1522  }
1523 
1524  if (UseInst->mayThrow() && !isInvisibleToCallerBeforeRet(DefUO)) {
1525  LLVM_DEBUG(dbgs() << " ... found throwing instruction\n");
1526  return None;
1527  }
1528 
1529  // Uses which may read the original MemoryDef mean we cannot eliminate the
1530  // original MD. Stop walk.
1531  if (isReadClobber(*CurrentLoc, UseInst)) {
1532  LLVM_DEBUG(dbgs() << " ... found read clobber\n");
1533  return None;
1534  }
1535 
1536  // For the KillingDef and EarlierAccess we only have to check if it reads
1537  // the memory location.
1538  // TODO: It would probably be better to check for self-reads before
1539  // calling the function.
1540  if (KillingDef == UseAccess || EarlierAccess == UseAccess) {
1541  LLVM_DEBUG(dbgs() << " ... skipping killing def/dom access\n");
1542  continue;
1543  }
1544 
1545  // Check all uses for MemoryDefs, except for defs completely overwriting
1546  // the original location. Otherwise we have to check uses of *all*
1547  // MemoryDefs we discover, including non-aliasing ones. Otherwise we might
1548  // miss cases like the following
1549  // 1 = Def(LoE) ; <----- EarlierDef stores [0,1]
1550  // 2 = Def(1) ; (2, 1) = NoAlias, stores [2,3]
1551  // Use(2) ; MayAlias 2 *and* 1, loads [0, 3].
1552  // (The Use points to the *first* Def it may alias)
1553  // 3 = Def(1) ; <---- Current (3, 2) = NoAlias, (3,1) = MayAlias,
1554  // stores [0,1]
1555  if (MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess)) {
1556  if (isCompleteOverwrite(*CurrentLoc, EarlierMemInst, UseInst)) {
1557  if (!isInvisibleToCallerAfterRet(DefUO) &&
1558  UseAccess != EarlierAccess) {
1559  BasicBlock *MaybeKillingBlock = UseInst->getParent();
1560  if (PostOrderNumbers.find(MaybeKillingBlock)->second <
1561  PostOrderNumbers.find(EarlierAccess->getBlock())->second) {
1562 
1563  LLVM_DEBUG(dbgs()
1564  << " ... found killing def " << *UseInst << "\n");
1565  KillingDefs.insert(UseInst);
1566  }
1567  }
1568  } else
1569  PushMemUses(UseDef);
1570  }
1571  }
1572 
1573  // For accesses to locations visible after the function returns, make sure
1574  // that the location is killed (=overwritten) along all paths from
1575  // EarlierAccess to the exit.
1576  if (!isInvisibleToCallerAfterRet(DefUO)) {
1577  SmallPtrSet<BasicBlock *, 16> KillingBlocks;
1578  for (Instruction *KD : KillingDefs)
1579  KillingBlocks.insert(KD->getParent());
1580  assert(!KillingBlocks.empty() &&
1581  "Expected at least a single killing block");
1582 
1583  // Find the common post-dominator of all killing blocks.
1584  BasicBlock *CommonPred = *KillingBlocks.begin();
1585  for (auto I = std::next(KillingBlocks.begin()), E = KillingBlocks.end();
1586  I != E; I++) {
1587  if (!CommonPred)
1588  break;
1589  CommonPred = PDT.findNearestCommonDominator(CommonPred, *I);
1590  }
1591 
1592  // If CommonPred is in the set of killing blocks, just check if it
1593  // post-dominates EarlierAccess.
1594  if (KillingBlocks.count(CommonPred)) {
1595  if (PDT.dominates(CommonPred, EarlierAccess->getBlock()))
1596  return {EarlierAccess};
1597  return None;
1598  }
1599 
1600  // If the common post-dominator does not post-dominate EarlierAccess,
1601  // there is a path from EarlierAccess to an exit not going through a
1602  // killing block.
1603  if (PDT.dominates(CommonPred, EarlierAccess->getBlock())) {
1604  SetVector<BasicBlock *> WorkList;
1605 
1606  // If CommonPred is null, there are multiple exits from the function.
1607  // They all have to be added to the worklist.
1608  if (CommonPred)
1609  WorkList.insert(CommonPred);
1610  else
1611  for (BasicBlock *R : PDT.roots())
1612  WorkList.insert(R);
1613 
1614  NumCFGTries++;
1615  // Check if all paths starting from an exit node go through one of the
1616  // killing blocks before reaching EarlierAccess.
1617  for (unsigned I = 0; I < WorkList.size(); I++) {
1618  NumCFGChecks++;
1619  BasicBlock *Current = WorkList[I];
1620  if (KillingBlocks.count(Current))
1621  continue;
1622  if (Current == EarlierAccess->getBlock())
1623  return None;
1624 
1625  // EarlierAccess is reachable from the entry, so we don't have to
1626  // explore unreachable blocks further.
1627  if (!DT.isReachableFromEntry(Current))
1628  continue;
1629 
1630  for (BasicBlock *Pred : predecessors(Current))
1631  WorkList.insert(Pred);
1632 
1633  if (WorkList.size() >= MemorySSAPathCheckLimit)
1634  return None;
1635  }
1636  NumCFGSuccess++;
1637  return {EarlierAccess};
1638  }
1639  return None;
1640  }
1641 
1642  // No aliasing MemoryUses of EarlierAccess found, EarlierAccess is
1643  // potentially dead.
1644  return {EarlierAccess};
1645  }
1646 
1647  // Delete dead memory defs
1649  MemorySSAUpdater Updater(&MSSA);
1650  SmallVector<Instruction *, 32> NowDeadInsts;
1651  NowDeadInsts.push_back(SI);
1652  --NumFastOther;
1653 
1654  while (!NowDeadInsts.empty()) {
1655  Instruction *DeadInst = NowDeadInsts.pop_back_val();
1656  ++NumFastOther;
1657 
1658  // Try to preserve debug information attached to the dead instruction.
1659  salvageDebugInfo(*DeadInst);
1660  salvageKnowledge(DeadInst);
1661 
1662  // Remove the Instruction from MSSA.
1663  if (MemoryAccess *MA = MSSA.getMemoryAccess(DeadInst)) {
1664  if (MemoryDef *MD = dyn_cast<MemoryDef>(MA)) {
1665  SkipStores.insert(MD);
1666  }
1667  Updater.removeMemoryAccess(MA);
1668  }
1669 
1670  auto I = IOLs.find(DeadInst->getParent());
1671  if (I != IOLs.end())
1672  I->second.erase(DeadInst);
1673  // Remove its operands
1674  for (Use &O : DeadInst->operands())
1675  if (Instruction *OpI = dyn_cast<Instruction>(O)) {
1676  O = nullptr;
1677  if (isInstructionTriviallyDead(OpI, &TLI))
1678  NowDeadInsts.push_back(OpI);
1679  }
1680 
1681  DeadInst->eraseFromParent();
1682  }
1683  }
1684 
1685  // Check for any extra throws between SI and NI that block DSE. This only
1686  // checks extra maythrows (those that aren't MemoryDef's). MemoryDef that may
1687  // throw are handled during the walk from one def to the next.
1688  bool mayThrowBetween(Instruction *SI, Instruction *NI,
1689  const Value *SILocUnd) {
1690  // First see if we can ignore it by using the fact that SI is an
1691  // alloca/alloca like object that is not visible to the caller during
1692  // execution of the function.
1693  if (SILocUnd && isInvisibleToCallerBeforeRet(SILocUnd))
1694  return false;
1695 
1696  if (SI->getParent() == NI->getParent())
1697  return ThrowingBlocks.count(SI->getParent());
1698  return !ThrowingBlocks.empty();
1699  }
1700 
1701  // Check if \p NI acts as a DSE barrier for \p SI. The following instructions
1702  // act as barriers:
1703  // * A memory instruction that may throw and \p SI accesses a non-stack
1704  // object.
1705  // * Atomic stores stronger that monotonic.
1706  bool isDSEBarrier(const Value *SILocUnd, Instruction *NI) {
1707  // If NI may throw it acts as a barrier, unless we are to an alloca/alloca
1708  // like object that does not escape.
1709  if (NI->mayThrow() && !isInvisibleToCallerBeforeRet(SILocUnd))
1710  return true;
1711 
1712  // If NI is an atomic load/store stronger than monotonic, do not try to
1713  // eliminate/reorder it.
1714  if (NI->isAtomic()) {
1715  if (auto *LI = dyn_cast<LoadInst>(NI))
1716  return isStrongerThanMonotonic(LI->getOrdering());
1717  if (auto *SI = dyn_cast<StoreInst>(NI))
1718  return isStrongerThanMonotonic(SI->getOrdering());
1719  if (auto *ARMW = dyn_cast<AtomicRMWInst>(NI))
1720  return isStrongerThanMonotonic(ARMW->getOrdering());
1721  if (auto *CmpXchg = dyn_cast<AtomicCmpXchgInst>(NI))
1722  return isStrongerThanMonotonic(CmpXchg->getSuccessOrdering()) ||
1723  isStrongerThanMonotonic(CmpXchg->getFailureOrdering());
1724  llvm_unreachable("other instructions should be skipped in MemorySSA");
1725  }
1726  return false;
1727  }
1728 
1729  /// Eliminate writes to objects that are not visible in the caller and are not
1730  /// accessed before returning from the function.
1731  bool eliminateDeadWritesAtEndOfFunction() {
1732  bool MadeChange = false;
1733  LLVM_DEBUG(
1734  dbgs()
1735  << "Trying to eliminate MemoryDefs at the end of the function\n");
1736  for (int I = MemDefs.size() - 1; I >= 0; I--) {
1737  MemoryDef *Def = MemDefs[I];
1738  if (SkipStores.contains(Def) || !isRemovable(Def->getMemoryInst()))
1739  continue;
1740 
1741  Instruction *DefI = Def->getMemoryInst();
1743  auto DefLoc = getLocForWriteEx(DefI);
1744  if (!DefLoc)
1745  continue;
1746 
1747  // NOTE: Currently eliminating writes at the end of a function is limited
1748  // to MemoryDefs with a single underlying object, to save compile-time. In
1749  // practice it appears the case with multiple underlying objects is very
1750  // uncommon. If it turns out to be important, we can use
1751  // getUnderlyingObjects here instead.
1752  const Value *UO = getUnderlyingObject(DefLoc->Ptr);
1753  if (!UO || !isInvisibleToCallerAfterRet(UO))
1754  continue;
1755 
1756  if (isWriteAtEndOfFunction(Def)) {
1757  // See through pointer-to-pointer bitcasts
1758  LLVM_DEBUG(dbgs() << " ... MemoryDef is not accessed until the end "
1759  "of the function\n");
1760  deleteDeadInstruction(DefI);
1761  ++NumFastStores;
1762  MadeChange = true;
1763  }
1764  }
1765  return MadeChange;
1766  }
1767 
1768  /// \returns true if \p Def is a no-op store, either because it
1769  /// directly stores back a loaded value or stores zero to a calloced object.
1770  bool storeIsNoop(MemoryDef *Def, const MemoryLocation &DefLoc,
1771  const Value *DefUO) {
1772  StoreInst *Store = dyn_cast<StoreInst>(Def->getMemoryInst());
1773  if (!Store)
1774  return false;
1775 
1776  if (auto *LoadI = dyn_cast<LoadInst>(Store->getOperand(0))) {
1777  if (LoadI->getPointerOperand() == Store->getOperand(1)) {
1778  // Get the defining access for the load.
1779  auto *LoadAccess = MSSA.getMemoryAccess(LoadI)->getDefiningAccess();
1780  // Fast path: the defining accesses are the same.
1781  if (LoadAccess == Def->getDefiningAccess())
1782  return true;
1783 
1784  // Look through phi accesses. Recursively scan all phi accesses by
1785  // adding them to a worklist. Bail when we run into a memory def that
1786  // does not match LoadAccess.
1787  SetVector<MemoryAccess *> ToCheck;
1788  MemoryAccess *Current =
1790  // We don't want to bail when we run into the store memory def. But,
1791  // the phi access may point to it. So, pretend like we've already
1792  // checked it.
1793  ToCheck.insert(Def);
1794  ToCheck.insert(Current);
1795  // Start at current (1) to simulate already having checked Def.
1796  for (unsigned I = 1; I < ToCheck.size(); ++I) {
1797  Current = ToCheck[I];
1798  if (auto PhiAccess = dyn_cast<MemoryPhi>(Current)) {
1799  // Check all the operands.
1800  for (auto &Use : PhiAccess->incoming_values())
1801  ToCheck.insert(cast<MemoryAccess>(&Use));
1802  continue;
1803  }
1804 
1805  // If we found a memory def, bail. This happens when we have an
1806  // unrelated write in between an otherwise noop store.
1807  assert(isa<MemoryDef>(Current) &&
1808  "Only MemoryDefs should reach here.");
1809  // TODO: Skip no alias MemoryDefs that have no aliasing reads.
1810  // We are searching for the definition of the store's destination.
1811  // So, if that is the same definition as the load, then this is a
1812  // noop. Otherwise, fail.
1813  if (LoadAccess != Current)
1814  return false;
1815  }
1816  return true;
1817  }
1818  }
1819 
1820  Constant *StoredConstant = dyn_cast<Constant>(Store->getOperand(0));
1821  if (StoredConstant && StoredConstant->isNullValue()) {
1822  auto *DefUOInst = dyn_cast<Instruction>(DefUO);
1823  if (DefUOInst && isCallocLikeFn(DefUOInst, &TLI)) {
1824  auto *UnderlyingDef = cast<MemoryDef>(MSSA.getMemoryAccess(DefUOInst));
1825  // If UnderlyingDef is the clobbering access of Def, no instructions
1826  // between them can modify the memory location.
1827  auto *ClobberDef =
1829  return UnderlyingDef == ClobberDef;
1830  }
1831  }
1832  return false;
1833  }
1834 };
1835 
1836 bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
1837  DominatorTree &DT, PostDominatorTree &PDT,
1838  const TargetLibraryInfo &TLI) {
1839  bool MadeChange = false;
1840 
1841  DSEState State = DSEState::get(F, AA, MSSA, DT, PDT, TLI);
1842  // For each store:
1843  for (unsigned I = 0; I < State.MemDefs.size(); I++) {
1844  MemoryDef *KillingDef = State.MemDefs[I];
1845  if (State.SkipStores.count(KillingDef))
1846  continue;
1847  Instruction *SI = KillingDef->getMemoryInst();
1848 
1849  Optional<MemoryLocation> MaybeSILoc;
1850  if (State.isMemTerminatorInst(SI))
1851  MaybeSILoc = State.getLocForTerminator(SI).map(
1852  [](const std::pair<MemoryLocation, bool> &P) { return P.first; });
1853  else
1854  MaybeSILoc = State.getLocForWriteEx(SI);
1855 
1856  if (!MaybeSILoc) {
1857  LLVM_DEBUG(dbgs() << "Failed to find analyzable write location for "
1858  << *SI << "\n");
1859  continue;
1860  }
1861  MemoryLocation SILoc = *MaybeSILoc;
1862  assert(SILoc.Ptr && "SILoc should not be null");
1863  const Value *SILocUnd = getUnderlyingObject(SILoc.Ptr);
1864 
1865  MemoryAccess *Current = KillingDef;
1866  LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs killed by "
1867  << *Current << " (" << *SI << ")\n");
1868 
1869  unsigned ScanLimit = MemorySSAScanLimit;
1870  unsigned WalkerStepLimit = MemorySSAUpwardsStepLimit;
1871  unsigned PartialLimit = MemorySSAPartialStoreLimit;
1872  // Worklist of MemoryAccesses that may be killed by KillingDef.
1873  SetVector<MemoryAccess *> ToCheck;
1874 
1875  if (SILocUnd)
1876  ToCheck.insert(KillingDef->getDefiningAccess());
1877 
1878  bool Shortend = false;
1879  bool IsMemTerm = State.isMemTerminatorInst(SI);
1880  // Check if MemoryAccesses in the worklist are killed by KillingDef.
1881  for (unsigned I = 0; I < ToCheck.size(); I++) {
1882  Current = ToCheck[I];
1883  if (State.SkipStores.count(Current))
1884  continue;
1885 
1886  Optional<MemoryAccess *> Next = State.getDomMemoryDef(
1887  KillingDef, Current, SILoc, SILocUnd, ScanLimit, WalkerStepLimit,
1888  IsMemTerm, PartialLimit);
1889 
1890  if (!Next) {
1891  LLVM_DEBUG(dbgs() << " finished walk\n");
1892  continue;
1893  }
1894 
1895  MemoryAccess *EarlierAccess = *Next;
1896  LLVM_DEBUG(dbgs() << " Checking if we can kill " << *EarlierAccess);
1897  if (isa<MemoryPhi>(EarlierAccess)) {
1898  LLVM_DEBUG(dbgs() << "\n ... adding incoming values to worklist\n");
1899  for (Value *V : cast<MemoryPhi>(EarlierAccess)->incoming_values()) {
1900  MemoryAccess *IncomingAccess = cast<MemoryAccess>(V);
1901  BasicBlock *IncomingBlock = IncomingAccess->getBlock();
1902  BasicBlock *PhiBlock = EarlierAccess->getBlock();
1903 
1904  // We only consider incoming MemoryAccesses that come before the
1905  // MemoryPhi. Otherwise we could discover candidates that do not
1906  // strictly dominate our starting def.
1907  if (State.PostOrderNumbers[IncomingBlock] >
1908  State.PostOrderNumbers[PhiBlock])
1909  ToCheck.insert(IncomingAccess);
1910  }
1911  continue;
1912  }
1913  auto *NextDef = cast<MemoryDef>(EarlierAccess);
1914  Instruction *NI = NextDef->getMemoryInst();
1915  LLVM_DEBUG(dbgs() << " (" << *NI << ")\n");
1916  ToCheck.insert(NextDef->getDefiningAccess());
1917  NumGetDomMemoryDefPassed++;
1918 
1919  if (!DebugCounter::shouldExecute(MemorySSACounter))
1920  continue;
1921 
1922  MemoryLocation NILoc = *State.getLocForWriteEx(NI);
1923 
1924  if (IsMemTerm) {
1925  const Value *NIUnd = getUnderlyingObject(NILoc.Ptr);
1926  if (SILocUnd != NIUnd)
1927  continue;
1928  LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *NI
1929  << "\n KILLER: " << *SI << '\n');
1930  State.deleteDeadInstruction(NI);
1931  ++NumFastStores;
1932  MadeChange = true;
1933  } else {
1934  // Check if NI overwrites SI.
1935  int64_t InstWriteOffset, DepWriteOffset;
1936  OverwriteResult OR =
1937  isOverwrite(SI, NI, SILoc, NILoc, State.DL, TLI, DepWriteOffset,
1938  InstWriteOffset, State.BatchAA, &F);
1939  if (OR == OW_MaybePartial) {
1940  auto Iter = State.IOLs.insert(
1941  std::make_pair<BasicBlock *, InstOverlapIntervalsTy>(
1942  NI->getParent(), InstOverlapIntervalsTy()));
1943  auto &IOL = Iter.first->second;
1944  OR = isPartialOverwrite(SILoc, NILoc, DepWriteOffset, InstWriteOffset,
1945  NI, IOL);
1946  }
1947 
1948  if (EnablePartialStoreMerging && OR == OW_PartialEarlierWithFullLater) {
1949  auto *Earlier = dyn_cast<StoreInst>(NI);
1950  auto *Later = dyn_cast<StoreInst>(SI);
1951  // We are re-using tryToMergePartialOverlappingStores, which requires
1952  // Earlier to domiante Later.
1953  // TODO: implement tryToMergeParialOverlappingStores using MemorySSA.
1954  if (Earlier && Later && DT.dominates(Earlier, Later)) {
1956  Earlier, Later, InstWriteOffset, DepWriteOffset, State.DL,
1957  State.BatchAA, &DT)) {
1958 
1959  // Update stored value of earlier store to merged constant.
1960  Earlier->setOperand(0, Merged);
1961  ++NumModifiedStores;
1962  MadeChange = true;
1963 
1964  Shortend = true;
1965  // Remove later store and remove any outstanding overlap intervals
1966  // for the updated store.
1967  State.deleteDeadInstruction(Later);
1968  auto I = State.IOLs.find(Earlier->getParent());
1969  if (I != State.IOLs.end())
1970  I->second.erase(Earlier);
1971  break;
1972  }
1973  }
1974  }
1975 
1976  if (OR == OW_Complete) {
1977  LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *NI
1978  << "\n KILLER: " << *SI << '\n');
1979  State.deleteDeadInstruction(NI);
1980  ++NumFastStores;
1981  MadeChange = true;
1982  }
1983  }
1984  }
1985 
1986  // Check if the store is a no-op.
1987  if (!Shortend && isRemovable(SI) &&
1988  State.storeIsNoop(KillingDef, SILoc, SILocUnd)) {
1989  LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " << *SI << '\n');
1990  State.deleteDeadInstruction(SI);
1991  NumRedundantStores++;
1992  MadeChange = true;
1993  continue;
1994  }
1995  }
1996 
1998  for (auto &KV : State.IOLs)
1999  MadeChange |= removePartiallyOverlappedStores(State.DL, KV.second, TLI);
2000 
2001  MadeChange |= State.eliminateDeadWritesAtEndOfFunction();
2002  return MadeChange;
2003 }
2004 } // end anonymous namespace
2005 
2006 //===----------------------------------------------------------------------===//
2007 // DSE Pass
2008 //===----------------------------------------------------------------------===//
2010  AliasAnalysis &AA = AM.getResult<AAManager>(F);
2013  MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
2015 
2016  bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI);
2017 
2018 #ifdef LLVM_ENABLE_STATS
2019  if (AreStatisticsEnabled())
2020  for (auto &I : instructions(F))
2021  NumRemainingStores += isa<StoreInst>(&I);
2022 #endif
2023 
2024  if (!Changed)
2025  return PreservedAnalyses::all();
2026 
2027  PreservedAnalyses PA;
2028  PA.preserveSet<CFGAnalyses>();
2029  PA.preserve<GlobalsAA>();
2031  return PA;
2032 }
2033 
2034 namespace {
2035 
2036 /// A legacy pass for the legacy pass manager that wraps \c DSEPass.
2037 class DSELegacyPass : public FunctionPass {
2038 public:
2039  static char ID; // Pass identification, replacement for typeid
2040 
2041  DSELegacyPass() : FunctionPass(ID) {
2043  }
2044 
2045  bool runOnFunction(Function &F) override {
2046  if (skipFunction(F))
2047  return false;
2048 
2049  AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
2050  DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2051  const TargetLibraryInfo &TLI =
2052  getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
2053  MemorySSA &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
2054  PostDominatorTree &PDT =
2055  getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
2056 
2057  bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI);
2058 
2059 #ifdef LLVM_ENABLE_STATS
2060  if (AreStatisticsEnabled())
2061  for (auto &I : instructions(F))
2062  NumRemainingStores += isa<StoreInst>(&I);
2063 #endif
2064 
2065  return Changed;
2066  }
2067 
2068  void getAnalysisUsage(AnalysisUsage &AU) const override {
2069  AU.setPreservesCFG();
2079  }
2080 };
2081 
2082 } // end anonymous namespace
2083 
2084 char DSELegacyPass::ID = 0;
2085 
2086 INITIALIZE_PASS_BEGIN(DSELegacyPass, "dse", "Dead Store Elimination", false,
2087  false)
2096  false)
2097 
2099  return new DSELegacyPass();
2100 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
dse
dse
Definition: DeadStoreElimination.cpp:2095
MemorySSAScanLimit
static cl::opt< unsigned > MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(150), cl::Hidden, cl::desc("The number of memory instructions to scan for " "dead store elimination (default = 100)"))
llvm::GlobalsAA
Analysis pass providing a never-invalidated alias analysis result.
Definition: GlobalsModRef.h:132
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
llvm::BatchAAResults
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
Definition: AliasAnalysis.h:889
llvm::Argument
This class represents an incoming formal argument to a Function.
Definition: Argument.h:29
llvm::AAManager
A manager for alias analyses.
Definition: AliasAnalysis.h:1221
getPointerSize
static uint64_t getPointerSize(const Value *V, const DataLayout &DL, const TargetLibraryInfo &TLI, const Function *F)
Definition: DeadStoreElimination.cpp:308
llvm::predecessors
pred_range predecessors(BasicBlock *BB)
Definition: CFG.h:127
llvm::isAligned
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:148
llvm::AliasResult::PartialAlias
@ PartialAlias
The two locations alias, but only due to a partial overlap.
Definition: AliasAnalysis.h:104
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:100
MathExtras.h
llvm::MemoryLocation::get
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
Definition: MemoryLocation.cpp:37
llvm
Definition: AllocatorList.h:23
llvm::DominatorTreeBase::findNearestCommonDominator
NodeT * findNearestCommonDominator(NodeT *A, NodeT *B) const
Find nearest common dominator basic block for basic block A and B.
Definition: GenericDomTree.h:468
llvm::tgtok::Def
@ Def
Definition: TGLexer.h:50
llvm::isCallocLikeFn
bool isCallocLikeFn(const Value *V, const TargetLibraryInfo *TLI, bool LookThroughBitCast=false)
Tests if a value is a call or invoke to a library function that allocates zero-filled memory (such as...
Definition: MemoryBuiltins.cpp:290
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:112
llvm::ISD::OR
@ OR
Definition: ISDOpcodes.h:618
llvm::User::operands
op_range operands()
Definition: User.h:242
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:90
llvm::PHITransAddr
PHITransAddr - An address value which tracks and handles phi translation.
Definition: PHITransAddr.h:35
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
IntrinsicInst.h
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:785
llvm::MemoryLocation::Ptr
const Value * Ptr
The address of the start of the location.
Definition: MemoryLocation.h:217
Scalar.h
InstIterator.h
MemorySSAUpdater.h
llvm::Function
Definition: Function.h:61
StringRef.h
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
Pass.h
llvm::IntrinsicInst::getIntrinsicID
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:52
llvm::RegState::Dead
@ Dead
Unused definition.
Definition: MachineInstrBuilder.h:51
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::size
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:77
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
Statistic.h
CaptureTracking.h
ErrorHandling.h
llvm::Function::getEntryBlock
const BasicBlock & getEntryBlock() const
Definition: Function.h:752
MapVector.h
DeadStoreElimination.h
ValueTracking.h
Local.h
llvm::isAllocLikeFn
bool isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI, bool LookThroughBitCast=false)
Tests if a value is a call or invoke to a library function that allocates memory (either malloc,...
Definition: MemoryBuiltins.cpp:305
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
GlobalsModRef.h
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:140
llvm::APInt::getBitsSet
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:613
APInt.h
MemorySSASameBBStepCost
static cl::opt< unsigned > MemorySSASameBBStepCost("dse-memoryssa-samebb-cost", cl::init(1), cl::Hidden, cl::desc("The cost of a step in the same basic block as the killing MemoryDef" "(default = 1)"))
llvm::MemoryLocation::Size
LocationSize Size
The maximum size of the location, in address-units, or UnknownSize if the size is not known.
Definition: MemoryLocation.h:226
DenseMap.h
llvm::APInt::getBitWidth
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1582
Module.h
MemoryBuiltins.h
llvm::ObjectSizeOpts::NullIsUnknownSize
bool NullIsUnknownSize
If this is true, null pointers in address space 0 will be treated as though they can't be evaluated.
Definition: MemoryBuiltins.h:208
llvm::createDeadStoreEliminationPass
FunctionPass * createDeadStoreEliminationPass()
Definition: DeadStoreElimination.cpp:2098
llvm::GetElementPtrInst::CreateInBounds
static GetElementPtrInst * CreateInBounds(Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Create an "inbounds" getelementptr.
Definition: Instructions.h:965
llvm::Optional
Definition: APInt.h:34
llvm::IntervalMap::empty
bool empty() const
empty - Return true when no intervals are mapped.
Definition: IntervalMap.h:1055
llvm::PHITransAddr::IsPotentiallyPHITranslatable
bool IsPotentiallyPHITranslatable() const
IsPotentiallyPHITranslatable - If this needs PHI translation, return true if we have some hope of doi...
Definition: PHITransAddr.cpp:114
llvm::AliasResult
The possible results of an alias query.
Definition: AliasAnalysis.h:81
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:449
llvm::Instruction::mayThrow
bool mayThrow() const
Return true if this instruction may throw an exception.
Definition: Instruction.cpp:652
llvm::SmallVectorImpl::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:635
llvm::LocationSize::isPrecise
bool isPrecise() const
Definition: MemoryLocation.h:165
llvm::BitmaskEnumDetail::Mask
std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
INITIALIZE_PASS_END
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
Definition: RegBankSelect.cpp:69
llvm::BatchAAResults::getModRefInfo
ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc)
Definition: AliasAnalysis.h:901
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:122
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::AtomicOrdering::Monotonic
@ Monotonic
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
AliasAnalysis.h
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
llvm::DominatorTree::dominates
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:115
Instruction.h
CommandLine.h
llvm::StoreInst::getValueOperand
Value * getValueOperand()
Definition: Instructions.h:398
llvm::AreStatisticsEnabled
bool AreStatisticsEnabled()
Check if statistics are enabled.
Definition: Statistic.cpp:131
llvm::MemorySSAWrapperPass
Legacy analysis pass which computes MemorySSA.
Definition: MemorySSA.h:961
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
Constants.h
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
llvm::AAResults
Definition: AliasAnalysis.h:456
PostDominators.h
isOverwrite
static OverwriteResult isOverwrite(const Instruction *LaterI, const Instruction *EarlierI, const MemoryLocation &Later, const MemoryLocation &Earlier, const DataLayout &DL, const TargetLibraryInfo &TLI, int64_t &EarlierOff, int64_t &LaterOff, BatchAAResults &AA, const Function *F)
Return 'OW_Complete' if a store to the 'Later' location (by LaterI instruction) completely overwrites...
Definition: DeadStoreElimination.cpp:366
llvm::Constant::isNullValue
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:86
llvm::MemorySSA::isLiveOnEntryDef
bool isLiveOnEntryDef(const MemoryAccess *MA) const
Return true if MA represents the live on entry value.
Definition: MemorySSA.h:741
hasAnalyzableMemoryWrite
static bool hasAnalyzableMemoryWrite(Instruction *I, const TargetLibraryInfo &TLI)
Does this instruction write some memory? This only returns true for things that we can analyze with o...
Definition: DeadStoreElimination.cpp:166
P2
This might compile to this xmm1 xorps xmm0 movss xmm0 ret Now consider if the code caused xmm1 to get spilled This might produce this xmm1 movaps xmm0 movaps xmm1 movss xmm0 ret since the reload is only used by these we could fold it into the producing something like xmm1 movaps xmm0 ret saving two instructions The basic idea is that a reload from a spill if only one byte chunk is bring in zeros the one element instead of elements This can be used to simplify a variety of shuffle where the elements are fixed zeros This code generates ugly probably due to costs being off or< 4 x float > * P2
Definition: README-SSE.txt:278
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
OverlapIntervalsTy
std::map< int64_t, int64_t > OverlapIntervalsTy
Definition: DeadStoreElimination.cpp:161
llvm::LibFunc
LibFunc
Definition: TargetLibraryInfo.h:34
Intrinsics.h
tryToShorten
static bool tryToShorten(Instruction *EarlierWrite, int64_t &EarlierStart, uint64_t &EarlierSize, int64_t LaterStart, uint64_t LaterSize, bool IsOverwriteEnd)
Definition: DeadStoreElimination.cpp:688
InstrTypes.h
llvm::PostDominatorTreeWrapperPass
Definition: PostDominators.h:73
SI
@ SI
Definition: SIInstrInfo.cpp:7342
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::LocationSize::getValue
uint64_t getValue() const
Definition: MemoryLocation.h:158
llvm::AliasResult::hasOffset
constexpr bool hasOffset() const
Definition: AliasAnalysis.h:117
TargetLibraryInfo.h
AssumeBundleBuilder.h
llvm::Value::uses
iterator_range< use_iterator > uses()
Definition: Value.h:389
false
Definition: StackSlotColoring.cpp:142
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::TargetLibraryInfo::getLibFunc
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
Definition: TargetLibraryInfo.h:277
llvm::PatternMatch::m_ConstantInt
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:145
llvm::Instruction
Definition: Instruction.h:45
llvm::DebugCounter::shouldExecute
static bool shouldExecute(unsigned CounterName)
Definition: DebugCounter.h:74
Elimination
Dead Store Elimination
Definition: DeadStoreElimination.cpp:2095
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:281
tryToMergePartialOverlappingStores
static Constant * tryToMergePartialOverlappingStores(StoreInst *Earlier, StoreInst *Later, int64_t InstWriteOffset, int64_t DepWriteOffset, const DataLayout &DL, BatchAAResults &AA, DominatorTree *DT)
Definition: DeadStoreElimination.cpp:860
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
isPartialOverwrite
static OverwriteResult isPartialOverwrite(const MemoryLocation &Later, const MemoryLocation &Earlier, int64_t EarlierOff, int64_t LaterOff, Instruction *DepWrite, InstOverlapIntervalsTy &IOL)
Return 'OW_Complete' if a store to the 'Later' location completely overwrites a store to the 'Earlier...
Definition: DeadStoreElimination.cpp:487
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:885
llvm::Use::getUser
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:73
llvm::getUnderlyingObject
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value,...
Definition: ValueTracking.cpp:4272
SmallPtrSet.h
llvm::DominatorTreeBase::roots
iterator_range< root_iterator > roots()
Definition: GenericDomTree.h:304
PatternMatch.h
llvm::MemoryUseOrDef::getMemoryInst
Instruction * getMemoryInst() const
Get the instruction that this MemoryUse represents.
Definition: MemorySSA.h:257
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::Instruction::mayWriteToMemory
bool mayWriteToMemory() const
Return true if this instruction may modify memory.
Definition: Instruction.cpp:563
llvm::isModSet
LLVM_NODISCARD bool isModSet(const ModRefInfo MRI)
Definition: AliasAnalysis.h:196
llvm::getObjectSize
bool getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout &DL, const TargetLibraryInfo *TLI, ObjectSizeOpts Opts={})
Compute the size of the object pointed by Ptr.
Definition: MemoryBuiltins.cpp:514
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(DSELegacyPass, "dse", "Dead Store Elimination", false, false) INITIALIZE_PASS_END(DSELegacyPass
llvm::None
const NoneType None
Definition: None.h:23
llvm::MemoryAccess::getBlock
BasicBlock * getBlock() const
Definition: MemorySSA.h:162
memoryIsNotModifiedBetween
static bool memoryIsNotModifiedBetween(Instruction *FirstI, Instruction *SecondI, BatchAAResults &AA, const DataLayout &DL, DominatorTree *DT)
Returns true if the memory which is accessed by the second instruction is not modified between the fi...
Definition: DeadStoreElimination.cpp:606
isShortenableAtTheBeginning
static bool isShortenableAtTheBeginning(Instruction *I)
Returns true if the beginning of this instruction can be safely shortened in length.
Definition: DeadStoreElimination.cpp:302
llvm::AliasResult::getOffset
constexpr int32_t getOffset() const
Definition: AliasAnalysis.h:118
tryToShortenEnd
static bool tryToShortenEnd(Instruction *EarlierWrite, OverlapIntervalsTy &IntervalMap, int64_t &EarlierStart, uint64_t &EarlierSize)
Definition: DeadStoreElimination.cpp:779
llvm::MemorySSAWalker::getClobberingMemoryAccess
MemoryAccess * getClobberingMemoryAccess(const Instruction *I)
Given a memory Mod/Ref/ModRef'ing instruction, calling this will give you the nearest dominating Memo...
Definition: MemorySSA.h:1021
BasicBlock.h
llvm::cl::opt< bool >
llvm::RISCVFenceField::O
@ O
Definition: RISCVBaseInfo.h:128
llvm::instructions
inst_range instructions(Function *F)
Definition: InstIterator.h:133
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:303
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
getLocForWrite
static MemoryLocation getLocForWrite(Instruction *Inst, const TargetLibraryInfo &TLI)
Return a Location stored to by the specified instruction.
Definition: DeadStoreElimination.cpp:207
llvm::Instruction::eraseFromParent
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:78
llvm::MemoryLocation::getOrNone
static Optional< MemoryLocation > getOrNone(const Instruction *Inst)
Definition: MemoryLocation.cpp:88
llvm::BatchAAResults::alias
AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
Definition: AliasAnalysis.h:895
llvm::TargetLibraryInfoWrapperPass
Definition: TargetLibraryInfo.h:446
D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
llvm::MemorySSAUpdater
Definition: MemorySSAUpdater.h:56
llvm::SmallPtrSetImpl::end
iterator end() const
Definition: SmallPtrSet.h:407
Addr
uint64_t Addr
Definition: ELFObjHandler.cpp:80
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::PreservedAnalyses::preserve
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:176
MemoryLocation.h
llvm::DenseMap
Definition: DenseMap.h:714
llvm::MemorySSA
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:704
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::MemorySSA::getMemoryAccess
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
Definition: MemorySSA.h:721
llvm::GetElementPtrInst
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:905
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:440
llvm::SmallPtrSetImpl::begin
iterator begin() const
Definition: SmallPtrSet.h:402
llvm::MemoryDef
Represents a read-write access to memory, whether it is a must-alias, or a may-alias.
Definition: MemorySSA.h:379
llvm::Instruction::setDebugLoc
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:362
EnablePartialStoreMerging
static cl::opt< bool > EnablePartialStoreMerging("enable-dse-partial-store-merging", cl::init(true), cl::Hidden, cl::desc("Enable partial store merging in DSE"))
llvm::DenseMapBase::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::MemoryLocation::getForArgument
static MemoryLocation getForArgument(const CallBase *Call, unsigned ArgIdx, const TargetLibraryInfo *TLI)
Return a location representing a particular argument of a call.
Definition: MemoryLocation.cpp:147
memcpy
<%struct.s * > cast struct s *S to sbyte *< sbyte * > sbyte uint cast struct s *agg result to sbyte *< sbyte * > sbyte uint cast struct s *memtmp to sbyte *< sbyte * > sbyte uint ret void llc ends up issuing two memcpy or custom lower memcpy(of small size) to be ldmia/stmia. I think option 2 is better but the current register allocator cannot allocate a chunk of registers at a time. A feasible temporary solution is to use specific physical registers at the lowering time for small(<
llvm::DominatorTree::isReachableFromEntry
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:328
llvm::TargetLibraryInfo::has
bool has(LibFunc F) const
Tests whether a library function is available.
Definition: TargetLibraryInfo.h:311
MemorySSAOtherBBStepCost
static cl::opt< unsigned > MemorySSAOtherBBStepCost("dse-memoryssa-otherbb-cost", cl::init(5), cl::Hidden, cl::desc("The cost of a step in a different basic " "block than the killing MemoryDef" "(default = 5)"))
llvm::salvageKnowledge
void salvageKnowledge(Instruction *I, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr)
Calls BuildAssumeFromInst and if the resulting llvm.assume is valid insert if before I.
Definition: AssumeBundleBuilder.cpp:291
llvm::PHITransAddr::PHITranslateValue
bool PHITranslateValue(BasicBlock *CurBB, BasicBlock *PredBB, const DominatorTree *DT, bool MustDominate)
PHITranslateValue - PHI translate the current address up the CFG from CurBB to Pred,...
Definition: PHITransAddr.cpp:312
EnablePartialOverwriteTracking
static cl::opt< bool > EnablePartialOverwriteTracking("enable-dse-partial-overwrite-tracking", cl::init(true), cl::Hidden, cl::desc("Enable partial-overwrite tracking in DSE"))
llvm::MemorySSAAnalysis
An analysis that produces MemorySSA for a function.
Definition: MemorySSA.h:922
llvm::MemorySSA::getWalker
MemorySSAWalker * getWalker()
Definition: MemorySSA.cpp:1566
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
MemorySSAPathCheckLimit
static cl::opt< unsigned > MemorySSAPathCheckLimit("dse-memoryssa-path-check-limit", cl::init(50), cl::Hidden, cl::desc("The maximum number of blocks to check when trying to prove that " "all paths to an exit go through a killing block (default = 50)"))
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:71
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
llvm::isInstructionTriviallyDead
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction has no side ef...
Definition: Local.cpp:389
llvm::PointerMayBeCaptured
bool PointerMayBeCaptured(const Value *V, bool ReturnCaptures, bool StoreCaptures, unsigned MaxUsesToExplore=0)
PointerMayBeCaptured - Return true if this pointer value may be captured by the enclosing function (w...
Definition: CaptureTracking.cpp:191
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::IntervalMap::end
const_iterator end() const
Definition: IntervalMap.h:1112
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1512
DataLayout.h
llvm::MemorySSA::getSkipSelfWalker
MemorySSAWalker * getSkipSelfWalker()
Definition: MemorySSA.cpp:1581
llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:253
llvm::PostDominatorTree
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
Definition: PostDominators.h:28
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:136
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
llvm::CFGAnalyses
Represents analyses that only rely on functions' control flow.
Definition: PassManager.h:116
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
getParent
static const Function * getParent(const Value *V)
Definition: BasicAliasAnalysis.cpp:759
uint32_t
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::AliasResult::MustAlias
@ MustAlias
The two locations precisely alias each other.
Definition: AliasAnalysis.h:106
llvm::ObjectSizeOpts
Various options to control the behavior of getObjectSize.
Definition: MemoryBuiltins.h:188
llvm::DominatorTreeBase::properlyDominates
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
Definition: GenericDomTree.h:392
llvm::Instruction::mayReadFromMemory
bool mayReadFromMemory() const
Return true if this instruction may read memory.
Definition: Instruction.cpp:543
llvm::MemoryAccess
Definition: MemorySSA.h:140
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::insert
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:207
llvm::SPII::Store
@ Store
Definition: SparcInstrInfo.h:33
llvm::Value::stripPointerCasts
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:636
Argument.h
llvm::APInt::zext
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:930
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
llvm::Instruction::isAtomic
bool isAtomic() const
Return true if this instruction has an AtomicOrdering of unordered or higher.
Definition: Instruction.cpp:583
llvm::PHITransAddr::getAddr
Value * getAddr() const
Definition: PHITransAddr.h:59
Constant.h
llvm::BatchAAResults::isMustAlias
bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
Definition: AliasAnalysis.h:920
llvm::MemoryDependenceWrapperPass
A wrapper analysis pass for the legacy pass manager that exposes a MemoryDepnedenceResults instance.
Definition: MemoryDependenceAnalysis.h:516
llvm::MemoryUseOrDef::getDefiningAccess
MemoryAccess * getDefiningAccess() const
Get the access that produces the memory state used by this Use.
Definition: MemorySSA.h:260
get
Should compile to something r4 addze r3 instead we get
Definition: README.txt:24
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::end
iterator end()
Definition: DenseMap.h:83
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
llvm::initializeDSELegacyPassPass
void initializeDSELegacyPassPass(PassRegistry &)
llvm::DSEPass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
Definition: DeadStoreElimination.cpp:2009
llvm::Align::value
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
MemorySSADefsPerBlockLimit
static cl::opt< unsigned > MemorySSADefsPerBlockLimit("dse-memoryssa-defs-per-block-limit", cl::init(5000), cl::Hidden, cl::desc("The number of MemoryDefs we consider as candidates to eliminated " "other stores per basic block (default = 5000)"))
Casting.h
llvm::post_order
iterator_range< po_iterator< T > > post_order(const T &G)
Definition: PostOrderIterator.h:188
Function.h
DebugCounter.h
PassManager.h
llvm::salvageDebugInfo
void salvageDebugInfo(Instruction &I)
Assuming the instruction I is going to be deleted, attempt to salvage debug users of I by writing the...
Definition: Local.cpp:1799
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:207
llvm::GetPointerBaseWithConstantOffset
Value * GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, const DataLayout &DL, bool AllowNonInbounds=true)
Analyze the specified pointer to see if it can be expressed as a base pointer plus a constant offset.
Definition: ValueTracking.h:279
llvm::MemoryLocation::UnknownSize
@ UnknownSize
Definition: MemoryLocation.h:214
llvm::MemoryLocation::getWithNewPtr
MemoryLocation getWithNewPtr(const Value *NewPtr) const
Definition: MemoryLocation.h:292
MemorySSAPartialStoreLimit
static cl::opt< unsigned > MemorySSAPartialStoreLimit("dse-memoryssa-partial-store-limit", cl::init(5), cl::Hidden, cl::desc("The maximum number candidates that only partially overwrite the " "killing MemoryDef to consider" " (default = 5)"))
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:45
removePartiallyOverlappedStores
static bool removePartiallyOverlappedStores(const DataLayout &DL, InstOverlapIntervalsTy &IOL, const TargetLibraryInfo &TLI)
Definition: DeadStoreElimination.cpp:836
llvm::isStrongerThan
bool isStrongerThan(AtomicOrdering AO, AtomicOrdering Other)
Returns true if ao is stronger than other as defined by the AtomicOrdering lattice,...
Definition: AtomicOrdering.h:90
DEBUG_COUNTER
DEBUG_COUNTER(MemorySSACounter, "dse-memoryssa", "Controls which MemoryDefs are eliminated.")
isMaskedStoreOverwrite
static OverwriteResult isMaskedStoreOverwrite(const Instruction *Later, const Instruction *Earlier, BatchAAResults &AA)
Check if two instruction are masked stores that completely overwrite one another.
Definition: DeadStoreElimination.cpp:336
llvm::DominatorTreeAnalysis
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:252
MemorySSA.h
Instructions.h
PostOrderIterator.h
llvm::PreservedAnalyses::preserveSet
void preserveSet()
Mark an analysis set as preserved.
Definition: PassManager.h:191
SmallVector.h
llvm::isRefSet
LLVM_NODISCARD bool isRefSet(const ModRefInfo MRI)
Definition: AliasAnalysis.h:199
Dominators.h
llvm::MemoryLocation::getAfter
static MemoryLocation getAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location after Ptr, while remaining within the underlying objec...
Definition: MemoryLocation.h:267
llvm::SmallPtrSetImplBase::empty
LLVM_NODISCARD bool empty() const
Definition: SmallPtrSet.h:91
tryToShortenBegin
static bool tryToShortenBegin(Instruction *EarlierWrite, OverlapIntervalsTy &IntervalMap, int64_t &EarlierStart, uint64_t &EarlierSize)
Definition: DeadStoreElimination.cpp:807
llvm::IntervalMap::begin
const_iterator begin() const
Definition: IntervalMap.h:1100
llvm::AAResultsWrapperPass
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Definition: AliasAnalysis.h:1269
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:94
llvm::GlobalsAAWrapperPass
Legacy wrapper pass to provide the GlobalsAAResult object.
Definition: GlobalsModRef.h:143
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:350
llvm::isStrongerThanMonotonic
bool isStrongerThanMonotonic(AtomicOrdering AO)
Definition: AtomicOrdering.h:124
llvm::IntervalMap
Definition: IntervalMap.h:938
llvm::SmallSetVector
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:307
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:171
llvm::PostDominatorTreeAnalysis
Analysis pass which computes a PostDominatorTree.
Definition: PostDominators.h:47
deleteDeadInstruction
static void deleteDeadInstruction(Instruction *I)
Definition: LoopIdiomRecognize.cpp:330
llvm::MemoryLocation::getForDest
static MemoryLocation getForDest(const MemIntrinsic *MI)
Return a location representing the destination of a memory set or transfer.
Definition: MemoryLocation.cpp:126
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
LLVMContext.h
MemorySSAUpwardsStepLimit
static cl::opt< unsigned > MemorySSAUpwardsStepLimit("dse-memoryssa-walklimit", cl::init(90), cl::Hidden, cl::desc("The maximum number of steps while walking upwards to find " "MemoryDefs that may be killed (default = 90)"))
llvm::cl::desc
Definition: CommandLine.h:411
raw_ostream.h
llvm::NullPointerIsDefined
bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
Definition: Function.cpp:1851
llvm::PHITransAddr::NeedsPHITranslationFromBlock
bool NeedsPHITranslationFromBlock(BasicBlock *BB) const
NeedsPHITranslationFromBlock - Return true if moving from the specified BasicBlock to its predecessor...
Definition: PHITransAddr.h:63
llvm::SetVector
A vector that has set insertion semantics.
Definition: SetVector.h:40
llvm::SmallPtrSetImpl::contains
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:388
llvm::PostDominatorTree::dominates
bool dominates(const Instruction *I1, const Instruction *I2) const
Return true if I1 dominates I2.
Definition: PostDominators.cpp:54
Value.h
InitializePasses.h
isShortenableAtTheEnd
static bool isShortenableAtTheEnd(Instruction *I)
Returns true if the end of this instruction can be safely shortened in length.
Definition: DeadStoreElimination.cpp:277
llvm::offsetToAlignment
uint64_t offsetToAlignment(uint64_t Value, Align Alignment)
Returns the offset to the next integer (mod 2**64) that is greater than or equal to Value and is a mu...
Definition: Alignment.h:206
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
Debug.h
llvm::TargetLibraryAnalysis
Analysis pass providing the TargetLibraryInfo.
Definition: TargetLibraryInfo.h:421
llvm::MemoryLocation
Representation for a specific memory location.
Definition: MemoryLocation.h:209
isRemovable
static bool isRemovable(Instruction *I)
If the value of this instruction and the memory it writes to is unused, may we delete this instructio...
Definition: DeadStoreElimination.cpp:239
llvm::isFreeCall
const CallInst * isFreeCall(const Value *I, const TargetLibraryInfo *TLI)
isFreeCall - Returns non-null if the value is a call to the builtin free()
Definition: MemoryBuiltins.cpp:485
SetVector.h
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:44
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38