LLVM 17.0.0git
DeadStoreElimination.cpp
Go to the documentation of this file.
1//===- DeadStoreElimination.cpp - MemorySSA Backed Dead Store Elimination -===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// The code below implements dead store elimination using MemorySSA. It uses
10// the following general approach: given a MemoryDef, walk upwards to find
11// clobbering MemoryDefs that may be killed by the starting def. Then check
12// that there are no uses that may read the location of the original MemoryDef
13// in between both MemoryDefs. A bit more concretely:
14//
15// For all MemoryDefs StartDef:
16// 1. Get the next dominating clobbering MemoryDef (MaybeDeadAccess) by walking
17// upwards.
18// 2. Check that there are no reads between MaybeDeadAccess and the StartDef by
19// checking all uses starting at MaybeDeadAccess and walking until we see
20// StartDef.
21// 3. For each found CurrentDef, check that:
22// 1. There are no barrier instructions between CurrentDef and StartDef (like
23// throws or stores with ordering constraints).
24// 2. StartDef is executed whenever CurrentDef is executed.
25// 3. StartDef completely overwrites CurrentDef.
26// 4. Erase CurrentDef from the function and MemorySSA.
27//
28//===----------------------------------------------------------------------===//
29
31#include "llvm/ADT/APInt.h"
32#include "llvm/ADT/DenseMap.h"
33#include "llvm/ADT/MapVector.h"
35#include "llvm/ADT/SetVector.h"
38#include "llvm/ADT/Statistic.h"
39#include "llvm/ADT/StringRef.h"
54#include "llvm/IR/Argument.h"
55#include "llvm/IR/BasicBlock.h"
56#include "llvm/IR/Constant.h"
57#include "llvm/IR/Constants.h"
58#include "llvm/IR/DataLayout.h"
59#include "llvm/IR/DebugInfo.h"
60#include "llvm/IR/Dominators.h"
61#include "llvm/IR/Function.h"
62#include "llvm/IR/IRBuilder.h"
64#include "llvm/IR/InstrTypes.h"
65#include "llvm/IR/Instruction.h"
68#include "llvm/IR/Module.h"
69#include "llvm/IR/PassManager.h"
71#include "llvm/IR/Value.h"
73#include "llvm/Pass.h"
76#include "llvm/Support/Debug.h"
84#include <algorithm>
85#include <cassert>
86#include <cstdint>
87#include <iterator>
88#include <map>
89#include <optional>
90#include <utility>
91
92using namespace llvm;
93using namespace PatternMatch;
94
95#define DEBUG_TYPE "dse"
96
97STATISTIC(NumRemainingStores, "Number of stores remaining after DSE");
98STATISTIC(NumRedundantStores, "Number of redundant stores deleted");
99STATISTIC(NumFastStores, "Number of stores deleted");
100STATISTIC(NumFastOther, "Number of other instrs removed");
101STATISTIC(NumCompletePartials, "Number of stores dead by later partials");
102STATISTIC(NumModifiedStores, "Number of stores modified");
103STATISTIC(NumCFGChecks, "Number of stores modified");
104STATISTIC(NumCFGTries, "Number of stores modified");
105STATISTIC(NumCFGSuccess, "Number of stores modified");
106STATISTIC(NumGetDomMemoryDefPassed,
107 "Number of times a valid candidate is returned from getDomMemoryDef");
108STATISTIC(NumDomMemDefChecks,
109 "Number iterations check for reads in getDomMemoryDef");
110
111DEBUG_COUNTER(MemorySSACounter, "dse-memoryssa",
112 "Controls which MemoryDefs are eliminated.");
113
114static cl::opt<bool>
115EnablePartialOverwriteTracking("enable-dse-partial-overwrite-tracking",
116 cl::init(true), cl::Hidden,
117 cl::desc("Enable partial-overwrite tracking in DSE"));
118
119static cl::opt<bool>
120EnablePartialStoreMerging("enable-dse-partial-store-merging",
121 cl::init(true), cl::Hidden,
122 cl::desc("Enable partial store merging in DSE"));
123
125 MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(150), cl::Hidden,
126 cl::desc("The number of memory instructions to scan for "
127 "dead store elimination (default = 150)"));
129 "dse-memoryssa-walklimit", cl::init(90), cl::Hidden,
130 cl::desc("The maximum number of steps while walking upwards to find "
131 "MemoryDefs that may be killed (default = 90)"));
132
134 "dse-memoryssa-partial-store-limit", cl::init(5), cl::Hidden,
135 cl::desc("The maximum number candidates that only partially overwrite the "
136 "killing MemoryDef to consider"
137 " (default = 5)"));
138
140 "dse-memoryssa-defs-per-block-limit", cl::init(5000), cl::Hidden,
141 cl::desc("The number of MemoryDefs we consider as candidates to eliminated "
142 "other stores per basic block (default = 5000)"));
143
145 "dse-memoryssa-samebb-cost", cl::init(1), cl::Hidden,
146 cl::desc(
147 "The cost of a step in the same basic block as the killing MemoryDef"
148 "(default = 1)"));
149
151 MemorySSAOtherBBStepCost("dse-memoryssa-otherbb-cost", cl::init(5),
153 cl::desc("The cost of a step in a different basic "
154 "block than the killing MemoryDef"
155 "(default = 5)"));
156
158 "dse-memoryssa-path-check-limit", cl::init(50), cl::Hidden,
159 cl::desc("The maximum number of blocks to check when trying to prove that "
160 "all paths to an exit go through a killing block (default = 50)"));
161
162// This flags allows or disallows DSE to optimize MemorySSA during its
163// traversal. Note that DSE optimizing MemorySSA may impact other passes
164// downstream of the DSE invocation and can lead to issues not being
165// reproducible in isolation (i.e. when MemorySSA is built from scratch). In
166// those cases, the flag can be used to check if DSE's MemorySSA optimizations
167// impact follow-up passes.
168static cl::opt<bool>
169 OptimizeMemorySSA("dse-optimize-memoryssa", cl::init(true), cl::Hidden,
170 cl::desc("Allow DSE to optimize memory accesses."));
171
172//===----------------------------------------------------------------------===//
173// Helper functions
174//===----------------------------------------------------------------------===//
175using OverlapIntervalsTy = std::map<int64_t, int64_t>;
177
178/// Returns true if the end of this instruction can be safely shortened in
179/// length.
181 // Don't shorten stores for now
182 if (isa<StoreInst>(I))
183 return false;
184
185 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
186 switch (II->getIntrinsicID()) {
187 default: return false;
188 case Intrinsic::memset:
189 case Intrinsic::memcpy:
190 case Intrinsic::memcpy_element_unordered_atomic:
191 case Intrinsic::memset_element_unordered_atomic:
192 // Do shorten memory intrinsics.
193 // FIXME: Add memmove if it's also safe to transform.
194 return true;
195 }
196 }
197
198 // Don't shorten libcalls calls for now.
199
200 return false;
201}
202
203/// Returns true if the beginning of this instruction can be safely shortened
204/// in length.
206 // FIXME: Handle only memset for now. Supporting memcpy/memmove should be
207 // easily done by offsetting the source address.
208 return isa<AnyMemSetInst>(I);
209}
210
211static uint64_t getPointerSize(const Value *V, const DataLayout &DL,
212 const TargetLibraryInfo &TLI,
213 const Function *F) {
215 ObjectSizeOpts Opts;
217
218 if (getObjectSize(V, Size, DL, &TLI, Opts))
219 return Size;
221}
222
223namespace {
224
225enum OverwriteResult {
226 OW_Begin,
227 OW_Complete,
228 OW_End,
229 OW_PartialEarlierWithFullLater,
230 OW_MaybePartial,
231 OW_None,
232 OW_Unknown
233};
234
235} // end anonymous namespace
236
237/// Check if two instruction are masked stores that completely
238/// overwrite one another. More specifically, \p KillingI has to
239/// overwrite \p DeadI.
240static OverwriteResult isMaskedStoreOverwrite(const Instruction *KillingI,
241 const Instruction *DeadI,
242 BatchAAResults &AA) {
243 const auto *KillingII = dyn_cast<IntrinsicInst>(KillingI);
244 const auto *DeadII = dyn_cast<IntrinsicInst>(DeadI);
245 if (KillingII == nullptr || DeadII == nullptr)
246 return OW_Unknown;
247 if (KillingII->getIntrinsicID() != DeadII->getIntrinsicID())
248 return OW_Unknown;
249 if (KillingII->getIntrinsicID() == Intrinsic::masked_store) {
250 // Type size.
251 VectorType *KillingTy =
252 cast<VectorType>(KillingII->getArgOperand(0)->getType());
253 VectorType *DeadTy = cast<VectorType>(DeadII->getArgOperand(0)->getType());
254 if (KillingTy->getScalarSizeInBits() != DeadTy->getScalarSizeInBits())
255 return OW_Unknown;
256 // Element count.
257 if (KillingTy->getElementCount() != DeadTy->getElementCount())
258 return OW_Unknown;
259 // Pointers.
260 Value *KillingPtr = KillingII->getArgOperand(1)->stripPointerCasts();
261 Value *DeadPtr = DeadII->getArgOperand(1)->stripPointerCasts();
262 if (KillingPtr != DeadPtr && !AA.isMustAlias(KillingPtr, DeadPtr))
263 return OW_Unknown;
264 // Masks.
265 // TODO: check that KillingII's mask is a superset of the DeadII's mask.
266 if (KillingII->getArgOperand(3) != DeadII->getArgOperand(3))
267 return OW_Unknown;
268 return OW_Complete;
269 }
270 return OW_Unknown;
271}
272
273/// Return 'OW_Complete' if a store to the 'KillingLoc' location completely
274/// overwrites a store to the 'DeadLoc' location, 'OW_End' if the end of the
275/// 'DeadLoc' location is completely overwritten by 'KillingLoc', 'OW_Begin'
276/// if the beginning of the 'DeadLoc' location is overwritten by 'KillingLoc'.
277/// 'OW_PartialEarlierWithFullLater' means that a dead (big) store was
278/// overwritten by a killing (smaller) store which doesn't write outside the big
279/// store's memory locations. Returns 'OW_Unknown' if nothing can be determined.
280/// NOTE: This function must only be called if both \p KillingLoc and \p
281/// DeadLoc belong to the same underlying object with valid \p KillingOff and
282/// \p DeadOff.
283static OverwriteResult isPartialOverwrite(const MemoryLocation &KillingLoc,
284 const MemoryLocation &DeadLoc,
285 int64_t KillingOff, int64_t DeadOff,
286 Instruction *DeadI,
288 const uint64_t KillingSize = KillingLoc.Size.getValue();
289 const uint64_t DeadSize = DeadLoc.Size.getValue();
290 // We may now overlap, although the overlap is not complete. There might also
291 // be other incomplete overlaps, and together, they might cover the complete
292 // dead store.
293 // Note: The correctness of this logic depends on the fact that this function
294 // is not even called providing DepWrite when there are any intervening reads.
296 KillingOff < int64_t(DeadOff + DeadSize) &&
297 int64_t(KillingOff + KillingSize) >= DeadOff) {
298
299 // Insert our part of the overlap into the map.
300 auto &IM = IOL[DeadI];
301 LLVM_DEBUG(dbgs() << "DSE: Partial overwrite: DeadLoc [" << DeadOff << ", "
302 << int64_t(DeadOff + DeadSize) << ") KillingLoc ["
303 << KillingOff << ", " << int64_t(KillingOff + KillingSize)
304 << ")\n");
305
306 // Make sure that we only insert non-overlapping intervals and combine
307 // adjacent intervals. The intervals are stored in the map with the ending
308 // offset as the key (in the half-open sense) and the starting offset as
309 // the value.
310 int64_t KillingIntStart = KillingOff;
311 int64_t KillingIntEnd = KillingOff + KillingSize;
312
313 // Find any intervals ending at, or after, KillingIntStart which start
314 // before KillingIntEnd.
315 auto ILI = IM.lower_bound(KillingIntStart);
316 if (ILI != IM.end() && ILI->second <= KillingIntEnd) {
317 // This existing interval is overlapped with the current store somewhere
318 // in [KillingIntStart, KillingIntEnd]. Merge them by erasing the existing
319 // intervals and adjusting our start and end.
320 KillingIntStart = std::min(KillingIntStart, ILI->second);
321 KillingIntEnd = std::max(KillingIntEnd, ILI->first);
322 ILI = IM.erase(ILI);
323
324 // Continue erasing and adjusting our end in case other previous
325 // intervals are also overlapped with the current store.
326 //
327 // |--- dead 1 ---| |--- dead 2 ---|
328 // |------- killing---------|
329 //
330 while (ILI != IM.end() && ILI->second <= KillingIntEnd) {
331 assert(ILI->second > KillingIntStart && "Unexpected interval");
332 KillingIntEnd = std::max(KillingIntEnd, ILI->first);
333 ILI = IM.erase(ILI);
334 }
335 }
336
337 IM[KillingIntEnd] = KillingIntStart;
338
339 ILI = IM.begin();
340 if (ILI->second <= DeadOff && ILI->first >= int64_t(DeadOff + DeadSize)) {
341 LLVM_DEBUG(dbgs() << "DSE: Full overwrite from partials: DeadLoc ["
342 << DeadOff << ", " << int64_t(DeadOff + DeadSize)
343 << ") Composite KillingLoc [" << ILI->second << ", "
344 << ILI->first << ")\n");
345 ++NumCompletePartials;
346 return OW_Complete;
347 }
348 }
349
350 // Check for a dead store which writes to all the memory locations that
351 // the killing store writes to.
352 if (EnablePartialStoreMerging && KillingOff >= DeadOff &&
353 int64_t(DeadOff + DeadSize) > KillingOff &&
354 uint64_t(KillingOff - DeadOff) + KillingSize <= DeadSize) {
355 LLVM_DEBUG(dbgs() << "DSE: Partial overwrite a dead load [" << DeadOff
356 << ", " << int64_t(DeadOff + DeadSize)
357 << ") by a killing store [" << KillingOff << ", "
358 << int64_t(KillingOff + KillingSize) << ")\n");
359 // TODO: Maybe come up with a better name?
360 return OW_PartialEarlierWithFullLater;
361 }
362
363 // Another interesting case is if the killing store overwrites the end of the
364 // dead store.
365 //
366 // |--dead--|
367 // |-- killing --|
368 //
369 // In this case we may want to trim the size of dead store to avoid
370 // generating stores to addresses which will definitely be overwritten killing
371 // store.
373 (KillingOff > DeadOff && KillingOff < int64_t(DeadOff + DeadSize) &&
374 int64_t(KillingOff + KillingSize) >= int64_t(DeadOff + DeadSize)))
375 return OW_End;
376
377 // Finally, we also need to check if the killing store overwrites the
378 // beginning of the dead store.
379 //
380 // |--dead--|
381 // |-- killing --|
382 //
383 // In this case we may want to move the destination address and trim the size
384 // of dead store to avoid generating stores to addresses which will definitely
385 // be overwritten killing store.
387 (KillingOff <= DeadOff && int64_t(KillingOff + KillingSize) > DeadOff)) {
388 assert(int64_t(KillingOff + KillingSize) < int64_t(DeadOff + DeadSize) &&
389 "Expect to be handled as OW_Complete");
390 return OW_Begin;
391 }
392 // Otherwise, they don't completely overlap.
393 return OW_Unknown;
394}
395
396/// Returns true if the memory which is accessed by the second instruction is not
397/// modified between the first and the second instruction.
398/// Precondition: Second instruction must be dominated by the first
399/// instruction.
400static bool
402 BatchAAResults &AA, const DataLayout &DL,
403 DominatorTree *DT) {
404 // Do a backwards scan through the CFG from SecondI to FirstI. Look for
405 // instructions which can modify the memory location accessed by SecondI.
406 //
407 // While doing the walk keep track of the address to check. It might be
408 // different in different basic blocks due to PHI translation.
409 using BlockAddressPair = std::pair<BasicBlock *, PHITransAddr>;
411 // Keep track of the address we visited each block with. Bail out if we
412 // visit a block with different addresses.
414
415 BasicBlock::iterator FirstBBI(FirstI);
416 ++FirstBBI;
417 BasicBlock::iterator SecondBBI(SecondI);
418 BasicBlock *FirstBB = FirstI->getParent();
419 BasicBlock *SecondBB = SecondI->getParent();
420 MemoryLocation MemLoc;
421 if (auto *MemSet = dyn_cast<MemSetInst>(SecondI))
422 MemLoc = MemoryLocation::getForDest(MemSet);
423 else
424 MemLoc = MemoryLocation::get(SecondI);
425
426 auto *MemLocPtr = const_cast<Value *>(MemLoc.Ptr);
427
428 // Start checking the SecondBB.
429 WorkList.push_back(
430 std::make_pair(SecondBB, PHITransAddr(MemLocPtr, DL, nullptr)));
431 bool isFirstBlock = true;
432
433 // Check all blocks going backward until we reach the FirstBB.
434 while (!WorkList.empty()) {
435 BlockAddressPair Current = WorkList.pop_back_val();
436 BasicBlock *B = Current.first;
437 PHITransAddr &Addr = Current.second;
438 Value *Ptr = Addr.getAddr();
439
440 // Ignore instructions before FirstI if this is the FirstBB.
441 BasicBlock::iterator BI = (B == FirstBB ? FirstBBI : B->begin());
442
444 if (isFirstBlock) {
445 // Ignore instructions after SecondI if this is the first visit of SecondBB.
446 assert(B == SecondBB && "first block is not the store block");
447 EI = SecondBBI;
448 isFirstBlock = false;
449 } else {
450 // It's not SecondBB or (in case of a loop) the second visit of SecondBB.
451 // In this case we also have to look at instructions after SecondI.
452 EI = B->end();
453 }
454 for (; BI != EI; ++BI) {
455 Instruction *I = &*BI;
456 if (I->mayWriteToMemory() && I != SecondI)
457 if (isModSet(AA.getModRefInfo(I, MemLoc.getWithNewPtr(Ptr))))
458 return false;
459 }
460 if (B != FirstBB) {
461 assert(B != &FirstBB->getParent()->getEntryBlock() &&
462 "Should not hit the entry block because SI must be dominated by LI");
463 for (BasicBlock *Pred : predecessors(B)) {
464 PHITransAddr PredAddr = Addr;
465 if (PredAddr.needsPHITranslationFromBlock(B)) {
466 if (!PredAddr.isPotentiallyPHITranslatable())
467 return false;
468 if (!PredAddr.translateValue(B, Pred, DT, false))
469 return false;
470 }
471 Value *TranslatedPtr = PredAddr.getAddr();
472 auto Inserted = Visited.insert(std::make_pair(Pred, TranslatedPtr));
473 if (!Inserted.second) {
474 // We already visited this block before. If it was with a different
475 // address - bail out!
476 if (TranslatedPtr != Inserted.first->second)
477 return false;
478 // ... otherwise just skip it.
479 continue;
480 }
481 WorkList.push_back(std::make_pair(Pred, PredAddr));
482 }
483 }
484 }
485 return true;
486}
487
488static void shortenAssignment(Instruction *Inst, uint64_t OldOffsetInBits,
489 uint64_t OldSizeInBits, uint64_t NewSizeInBits,
490 bool IsOverwriteEnd) {
491 DIExpression::FragmentInfo DeadFragment;
492 DeadFragment.SizeInBits = OldSizeInBits - NewSizeInBits;
493 DeadFragment.OffsetInBits =
494 OldOffsetInBits + (IsOverwriteEnd ? NewSizeInBits : 0);
495
496 auto CreateDeadFragExpr = [Inst, DeadFragment]() {
497 // FIXME: This should be using the DIExpression in the Alloca's dbg.assign
498 // for the variable, since that could also contain a fragment?
500 DIExpression::get(Inst->getContext(), std::nullopt),
501 DeadFragment.OffsetInBits, DeadFragment.SizeInBits);
502 };
503
504 // A DIAssignID to use so that the inserted dbg.assign intrinsics do not
505 // link to any instructions. Created in the loop below (once).
506 DIAssignID *LinkToNothing = nullptr;
507
508 // Insert an unlinked dbg.assign intrinsic for the dead fragment after each
509 // overlapping dbg.assign intrinsic.
510 for (auto *DAI : at::getAssignmentMarkers(Inst)) {
511 if (auto FragInfo = DAI->getExpression()->getFragmentInfo()) {
512 if (!DIExpression::fragmentsOverlap(*FragInfo, DeadFragment))
513 continue;
514 }
515
516 // Fragments overlap: insert a new dbg.assign for this dead part.
517 auto *NewAssign = cast<DbgAssignIntrinsic>(DAI->clone());
518 NewAssign->insertAfter(DAI);
519 if (!LinkToNothing)
520 LinkToNothing = DIAssignID::getDistinct(Inst->getContext());
521 NewAssign->setAssignId(LinkToNothing);
522 NewAssign->setExpression(CreateDeadFragExpr());
523 NewAssign->setKillAddress();
524 }
525}
526
527static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart,
528 uint64_t &DeadSize, int64_t KillingStart,
529 uint64_t KillingSize, bool IsOverwriteEnd) {
530 auto *DeadIntrinsic = cast<AnyMemIntrinsic>(DeadI);
531 Align PrefAlign = DeadIntrinsic->getDestAlign().valueOrOne();
532
533 // We assume that memet/memcpy operates in chunks of the "largest" native
534 // type size and aligned on the same value. That means optimal start and size
535 // of memset/memcpy should be modulo of preferred alignment of that type. That
536 // is it there is no any sense in trying to reduce store size any further
537 // since any "extra" stores comes for free anyway.
538 // On the other hand, maximum alignment we can achieve is limited by alignment
539 // of initial store.
540
541 // TODO: Limit maximum alignment by preferred (or abi?) alignment of the
542 // "largest" native type.
543 // Note: What is the proper way to get that value?
544 // Should TargetTransformInfo::getRegisterBitWidth be used or anything else?
545 // PrefAlign = std::min(DL.getPrefTypeAlign(LargestType), PrefAlign);
546
547 int64_t ToRemoveStart = 0;
548 uint64_t ToRemoveSize = 0;
549 // Compute start and size of the region to remove. Make sure 'PrefAlign' is
550 // maintained on the remaining store.
551 if (IsOverwriteEnd) {
552 // Calculate required adjustment for 'KillingStart' in order to keep
553 // remaining store size aligned on 'PerfAlign'.
554 uint64_t Off =
555 offsetToAlignment(uint64_t(KillingStart - DeadStart), PrefAlign);
556 ToRemoveStart = KillingStart + Off;
557 if (DeadSize <= uint64_t(ToRemoveStart - DeadStart))
558 return false;
559 ToRemoveSize = DeadSize - uint64_t(ToRemoveStart - DeadStart);
560 } else {
561 ToRemoveStart = DeadStart;
562 assert(KillingSize >= uint64_t(DeadStart - KillingStart) &&
563 "Not overlapping accesses?");
564 ToRemoveSize = KillingSize - uint64_t(DeadStart - KillingStart);
565 // Calculate required adjustment for 'ToRemoveSize'in order to keep
566 // start of the remaining store aligned on 'PerfAlign'.
567 uint64_t Off = offsetToAlignment(ToRemoveSize, PrefAlign);
568 if (Off != 0) {
569 if (ToRemoveSize <= (PrefAlign.value() - Off))
570 return false;
571 ToRemoveSize -= PrefAlign.value() - Off;
572 }
573 assert(isAligned(PrefAlign, ToRemoveSize) &&
574 "Should preserve selected alignment");
575 }
576
577 assert(ToRemoveSize > 0 && "Shouldn't reach here if nothing to remove");
578 assert(DeadSize > ToRemoveSize && "Can't remove more than original size");
579
580 uint64_t NewSize = DeadSize - ToRemoveSize;
581 if (auto *AMI = dyn_cast<AtomicMemIntrinsic>(DeadI)) {
582 // When shortening an atomic memory intrinsic, the newly shortened
583 // length must remain an integer multiple of the element size.
584 const uint32_t ElementSize = AMI->getElementSizeInBytes();
585 if (0 != NewSize % ElementSize)
586 return false;
587 }
588
589 LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW "
590 << (IsOverwriteEnd ? "END" : "BEGIN") << ": " << *DeadI
591 << "\n KILLER [" << ToRemoveStart << ", "
592 << int64_t(ToRemoveStart + ToRemoveSize) << ")\n");
593
594 Value *DeadWriteLength = DeadIntrinsic->getLength();
595 Value *TrimmedLength = ConstantInt::get(DeadWriteLength->getType(), NewSize);
596 DeadIntrinsic->setLength(TrimmedLength);
597 DeadIntrinsic->setDestAlignment(PrefAlign);
598
599 if (!IsOverwriteEnd) {
600 Value *OrigDest = DeadIntrinsic->getRawDest();
601 Type *Int8PtrTy =
602 Type::getInt8PtrTy(DeadIntrinsic->getContext(),
603 OrigDest->getType()->getPointerAddressSpace());
604 Value *Dest = OrigDest;
605 if (OrigDest->getType() != Int8PtrTy)
606 Dest = CastInst::CreatePointerCast(OrigDest, Int8PtrTy, "", DeadI);
607 Value *Indices[1] = {
608 ConstantInt::get(DeadWriteLength->getType(), ToRemoveSize)};
610 Type::getInt8Ty(DeadIntrinsic->getContext()), Dest, Indices, "", DeadI);
611 NewDestGEP->setDebugLoc(DeadIntrinsic->getDebugLoc());
612 if (NewDestGEP->getType() != OrigDest->getType())
613 NewDestGEP = CastInst::CreatePointerCast(NewDestGEP, OrigDest->getType(),
614 "", DeadI);
615 DeadIntrinsic->setDest(NewDestGEP);
616 }
617
618 // Update attached dbg.assign intrinsics. Assume 8-bit byte.
619 shortenAssignment(DeadI, DeadStart * 8, DeadSize * 8, NewSize * 8,
620 IsOverwriteEnd);
621
622 // Finally update start and size of dead access.
623 if (!IsOverwriteEnd)
624 DeadStart += ToRemoveSize;
625 DeadSize = NewSize;
626
627 return true;
628}
629
631 int64_t &DeadStart, uint64_t &DeadSize) {
632 if (IntervalMap.empty() || !isShortenableAtTheEnd(DeadI))
633 return false;
634
635 OverlapIntervalsTy::iterator OII = --IntervalMap.end();
636 int64_t KillingStart = OII->second;
637 uint64_t KillingSize = OII->first - KillingStart;
638
639 assert(OII->first - KillingStart >= 0 && "Size expected to be positive");
640
641 if (KillingStart > DeadStart &&
642 // Note: "KillingStart - KillingStart" is known to be positive due to
643 // preceding check.
644 (uint64_t)(KillingStart - DeadStart) < DeadSize &&
645 // Note: "DeadSize - (uint64_t)(KillingStart - DeadStart)" is known to
646 // be non negative due to preceding checks.
647 KillingSize >= DeadSize - (uint64_t)(KillingStart - DeadStart)) {
648 if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
649 true)) {
650 IntervalMap.erase(OII);
651 return true;
652 }
653 }
654 return false;
655}
656
659 int64_t &DeadStart, uint64_t &DeadSize) {
661 return false;
662
663 OverlapIntervalsTy::iterator OII = IntervalMap.begin();
664 int64_t KillingStart = OII->second;
665 uint64_t KillingSize = OII->first - KillingStart;
666
667 assert(OII->first - KillingStart >= 0 && "Size expected to be positive");
668
669 if (KillingStart <= DeadStart &&
670 // Note: "DeadStart - KillingStart" is known to be non negative due to
671 // preceding check.
672 KillingSize > (uint64_t)(DeadStart - KillingStart)) {
673 // Note: "KillingSize - (uint64_t)(DeadStart - DeadStart)" is known to
674 // be positive due to preceding checks.
675 assert(KillingSize - (uint64_t)(DeadStart - KillingStart) < DeadSize &&
676 "Should have been handled as OW_Complete");
677 if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
678 false)) {
679 IntervalMap.erase(OII);
680 return true;
681 }
682 }
683 return false;
684}
685
686static Constant *
688 int64_t KillingOffset, int64_t DeadOffset,
689 const DataLayout &DL, BatchAAResults &AA,
690 DominatorTree *DT) {
691
692 if (DeadI && isa<ConstantInt>(DeadI->getValueOperand()) &&
693 DL.typeSizeEqualsStoreSize(DeadI->getValueOperand()->getType()) &&
694 KillingI && isa<ConstantInt>(KillingI->getValueOperand()) &&
695 DL.typeSizeEqualsStoreSize(KillingI->getValueOperand()->getType()) &&
696 memoryIsNotModifiedBetween(DeadI, KillingI, AA, DL, DT)) {
697 // If the store we find is:
698 // a) partially overwritten by the store to 'Loc'
699 // b) the killing store is fully contained in the dead one and
700 // c) they both have a constant value
701 // d) none of the two stores need padding
702 // Merge the two stores, replacing the dead store's value with a
703 // merge of both values.
704 // TODO: Deal with other constant types (vectors, etc), and probably
705 // some mem intrinsics (if needed)
706
707 APInt DeadValue = cast<ConstantInt>(DeadI->getValueOperand())->getValue();
708 APInt KillingValue =
709 cast<ConstantInt>(KillingI->getValueOperand())->getValue();
710 unsigned KillingBits = KillingValue.getBitWidth();
711 assert(DeadValue.getBitWidth() > KillingValue.getBitWidth());
712 KillingValue = KillingValue.zext(DeadValue.getBitWidth());
713
714 // Offset of the smaller store inside the larger store
715 unsigned BitOffsetDiff = (KillingOffset - DeadOffset) * 8;
716 unsigned LShiftAmount =
717 DL.isBigEndian() ? DeadValue.getBitWidth() - BitOffsetDiff - KillingBits
718 : BitOffsetDiff;
719 APInt Mask = APInt::getBitsSet(DeadValue.getBitWidth(), LShiftAmount,
720 LShiftAmount + KillingBits);
721 // Clear the bits we'll be replacing, then OR with the smaller
722 // store, shifted appropriately.
723 APInt Merged = (DeadValue & ~Mask) | (KillingValue << LShiftAmount);
724 LLVM_DEBUG(dbgs() << "DSE: Merge Stores:\n Dead: " << *DeadI
725 << "\n Killing: " << *KillingI
726 << "\n Merged Value: " << Merged << '\n');
727 return ConstantInt::get(DeadI->getValueOperand()->getType(), Merged);
728 }
729 return nullptr;
730}
731
732namespace {
733// Returns true if \p I is an intrisnic that does not read or write memory.
734bool isNoopIntrinsic(Instruction *I) {
735 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
736 switch (II->getIntrinsicID()) {
737 case Intrinsic::lifetime_start:
738 case Intrinsic::lifetime_end:
739 case Intrinsic::invariant_end:
740 case Intrinsic::launder_invariant_group:
741 case Intrinsic::assume:
742 return true;
743 case Intrinsic::dbg_addr:
744 case Intrinsic::dbg_declare:
745 case Intrinsic::dbg_label:
746 case Intrinsic::dbg_value:
747 llvm_unreachable("Intrinsic should not be modeled in MemorySSA");
748 default:
749 return false;
750 }
751 }
752 return false;
753}
754
755// Check if we can ignore \p D for DSE.
756bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) {
757 Instruction *DI = D->getMemoryInst();
758 // Calls that only access inaccessible memory cannot read or write any memory
759 // locations we consider for elimination.
760 if (auto *CB = dyn_cast<CallBase>(DI))
761 if (CB->onlyAccessesInaccessibleMemory())
762 return true;
763
764 // We can eliminate stores to locations not visible to the caller across
765 // throwing instructions.
766 if (DI->mayThrow() && !DefVisibleToCaller)
767 return true;
768
769 // We can remove the dead stores, irrespective of the fence and its ordering
770 // (release/acquire/seq_cst). Fences only constraints the ordering of
771 // already visible stores, it does not make a store visible to other
772 // threads. So, skipping over a fence does not change a store from being
773 // dead.
774 if (isa<FenceInst>(DI))
775 return true;
776
777 // Skip intrinsics that do not really read or modify memory.
778 if (isNoopIntrinsic(DI))
779 return true;
780
781 return false;
782}
783
784struct DSEState {
785 Function &F;
786 AliasAnalysis &AA;
788
789 /// The single BatchAA instance that is used to cache AA queries. It will
790 /// not be invalidated over the whole run. This is safe, because:
791 /// 1. Only memory writes are removed, so the alias cache for memory
792 /// locations remains valid.
793 /// 2. No new instructions are added (only instructions removed), so cached
794 /// information for a deleted value cannot be accessed by a re-used new
795 /// value pointer.
796 BatchAAResults BatchAA;
797
798 MemorySSA &MSSA;
799 DominatorTree &DT;
801 const TargetLibraryInfo &TLI;
802 const DataLayout &DL;
803 const LoopInfo &LI;
804
805 // Whether the function contains any irreducible control flow, useful for
806 // being accurately able to detect loops.
807 bool ContainsIrreducibleLoops;
808
809 // All MemoryDefs that potentially could kill other MemDefs.
811 // Any that should be skipped as they are already deleted
813 // Keep track whether a given object is captured before return or not.
814 DenseMap<const Value *, bool> CapturedBeforeReturn;
815 // Keep track of all of the objects that are invisible to the caller after
816 // the function returns.
817 DenseMap<const Value *, bool> InvisibleToCallerAfterRet;
818 // Keep track of blocks with throwing instructions not modeled in MemorySSA.
819 SmallPtrSet<BasicBlock *, 16> ThrowingBlocks;
820 // Post-order numbers for each basic block. Used to figure out if memory
821 // accesses are executed before another access.
822 DenseMap<BasicBlock *, unsigned> PostOrderNumbers;
823 // Values that are only used with assumes. Used to refine pointer escape
824 // analysis.
826
827 /// Keep track of instructions (partly) overlapping with killing MemoryDefs per
828 /// basic block.
830 // Check if there are root nodes that are terminated by UnreachableInst.
831 // Those roots pessimize post-dominance queries. If there are such roots,
832 // fall back to CFG scan starting from all non-unreachable roots.
833 bool AnyUnreachableExit;
834
835 // Whether or not we should iterate on removing dead stores at the end of the
836 // function due to removing a store causing a previously captured pointer to
837 // no longer be captured.
838 bool ShouldIterateEndOfFunctionDSE;
839
840 // Class contains self-reference, make sure it's not copied/moved.
841 DSEState(const DSEState &) = delete;
842 DSEState &operator=(const DSEState &) = delete;
843
844 DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT,
846 const TargetLibraryInfo &TLI, const LoopInfo &LI)
847 : F(F), AA(AA), EI(DT, LI, EphValues), BatchAA(AA, &EI), MSSA(MSSA),
848 DT(DT), PDT(PDT), TLI(TLI), DL(F.getParent()->getDataLayout()), LI(LI) {
849 // Collect blocks with throwing instructions not modeled in MemorySSA and
850 // alloc-like objects.
851 unsigned PO = 0;
852 for (BasicBlock *BB : post_order(&F)) {
853 PostOrderNumbers[BB] = PO++;
854 for (Instruction &I : *BB) {
855 MemoryAccess *MA = MSSA.getMemoryAccess(&I);
856 if (I.mayThrow() && !MA)
857 ThrowingBlocks.insert(I.getParent());
858
859 auto *MD = dyn_cast_or_null<MemoryDef>(MA);
860 if (MD && MemDefs.size() < MemorySSADefsPerBlockLimit &&
861 (getLocForWrite(&I) || isMemTerminatorInst(&I)))
862 MemDefs.push_back(MD);
863 }
864 }
865
866 // Treat byval or inalloca arguments the same as Allocas, stores to them are
867 // dead at the end of the function.
868 for (Argument &AI : F.args())
869 if (AI.hasPassPointeeByValueCopyAttr())
870 InvisibleToCallerAfterRet.insert({&AI, true});
871
872 // Collect whether there is any irreducible control flow in the function.
873 ContainsIrreducibleLoops = mayContainIrreducibleControl(F, &LI);
874
875 AnyUnreachableExit = any_of(PDT.roots(), [](const BasicBlock *E) {
876 return isa<UnreachableInst>(E->getTerminator());
877 });
878
879 CodeMetrics::collectEphemeralValues(&F, &AC, EphValues);
880 }
881
882 LocationSize strengthenLocationSize(const Instruction *I,
883 LocationSize Size) const {
884 if (auto *CB = dyn_cast<CallBase>(I)) {
885 LibFunc F;
886 if (TLI.getLibFunc(*CB, F) && TLI.has(F) &&
887 (F == LibFunc_memset_chk || F == LibFunc_memcpy_chk)) {
888 // Use the precise location size specified by the 3rd argument
889 // for determining KillingI overwrites DeadLoc if it is a memset_chk
890 // instruction. memset_chk will write either the amount specified as 3rd
891 // argument or the function will immediately abort and exit the program.
892 // NOTE: AA may determine NoAlias if it can prove that the access size
893 // is larger than the allocation size due to that being UB. To avoid
894 // returning potentially invalid NoAlias results by AA, limit the use of
895 // the precise location size to isOverwrite.
896 if (const auto *Len = dyn_cast<ConstantInt>(CB->getArgOperand(2)))
897 return LocationSize::precise(Len->getZExtValue());
898 }
899 }
900 return Size;
901 }
902
903 /// Return 'OW_Complete' if a store to the 'KillingLoc' location (by \p
904 /// KillingI instruction) completely overwrites a store to the 'DeadLoc'
905 /// location (by \p DeadI instruction).
906 /// Return OW_MaybePartial if \p KillingI does not completely overwrite
907 /// \p DeadI, but they both write to the same underlying object. In that
908 /// case, use isPartialOverwrite to check if \p KillingI partially overwrites
909 /// \p DeadI. Returns 'OR_None' if \p KillingI is known to not overwrite the
910 /// \p DeadI. Returns 'OW_Unknown' if nothing can be determined.
911 OverwriteResult isOverwrite(const Instruction *KillingI,
912 const Instruction *DeadI,
913 const MemoryLocation &KillingLoc,
914 const MemoryLocation &DeadLoc,
915 int64_t &KillingOff, int64_t &DeadOff) {
916 // AliasAnalysis does not always account for loops. Limit overwrite checks
917 // to dependencies for which we can guarantee they are independent of any
918 // loops they are in.
919 if (!isGuaranteedLoopIndependent(DeadI, KillingI, DeadLoc))
920 return OW_Unknown;
921
922 LocationSize KillingLocSize =
923 strengthenLocationSize(KillingI, KillingLoc.Size);
924 const Value *DeadPtr = DeadLoc.Ptr->stripPointerCasts();
925 const Value *KillingPtr = KillingLoc.Ptr->stripPointerCasts();
926 const Value *DeadUndObj = getUnderlyingObject(DeadPtr);
927 const Value *KillingUndObj = getUnderlyingObject(KillingPtr);
928
929 // Check whether the killing store overwrites the whole object, in which
930 // case the size/offset of the dead store does not matter.
931 if (DeadUndObj == KillingUndObj && KillingLocSize.isPrecise()) {
932 uint64_t KillingUndObjSize = getPointerSize(KillingUndObj, DL, TLI, &F);
933 if (KillingUndObjSize != MemoryLocation::UnknownSize &&
934 KillingUndObjSize == KillingLocSize.getValue())
935 return OW_Complete;
936 }
937
938 // FIXME: Vet that this works for size upper-bounds. Seems unlikely that we'll
939 // get imprecise values here, though (except for unknown sizes).
940 if (!KillingLocSize.isPrecise() || !DeadLoc.Size.isPrecise()) {
941 // In case no constant size is known, try to an IR values for the number
942 // of bytes written and check if they match.
943 const auto *KillingMemI = dyn_cast<MemIntrinsic>(KillingI);
944 const auto *DeadMemI = dyn_cast<MemIntrinsic>(DeadI);
945 if (KillingMemI && DeadMemI) {
946 const Value *KillingV = KillingMemI->getLength();
947 const Value *DeadV = DeadMemI->getLength();
948 if (KillingV == DeadV && BatchAA.isMustAlias(DeadLoc, KillingLoc))
949 return OW_Complete;
950 }
951
952 // Masked stores have imprecise locations, but we can reason about them
953 // to some extent.
954 return isMaskedStoreOverwrite(KillingI, DeadI, BatchAA);
955 }
956
957 const uint64_t KillingSize = KillingLocSize.getValue();
958 const uint64_t DeadSize = DeadLoc.Size.getValue();
959
960 // Query the alias information
961 AliasResult AAR = BatchAA.alias(KillingLoc, DeadLoc);
962
963 // If the start pointers are the same, we just have to compare sizes to see if
964 // the killing store was larger than the dead store.
965 if (AAR == AliasResult::MustAlias) {
966 // Make sure that the KillingSize size is >= the DeadSize size.
967 if (KillingSize >= DeadSize)
968 return OW_Complete;
969 }
970
971 // If we hit a partial alias we may have a full overwrite
972 if (AAR == AliasResult::PartialAlias && AAR.hasOffset()) {
973 int32_t Off = AAR.getOffset();
974 if (Off >= 0 && (uint64_t)Off + DeadSize <= KillingSize)
975 return OW_Complete;
976 }
977
978 // If we can't resolve the same pointers to the same object, then we can't
979 // analyze them at all.
980 if (DeadUndObj != KillingUndObj) {
981 // Non aliasing stores to different objects don't overlap. Note that
982 // if the killing store is known to overwrite whole object (out of
983 // bounds access overwrites whole object as well) then it is assumed to
984 // completely overwrite any store to the same object even if they don't
985 // actually alias (see next check).
986 if (AAR == AliasResult::NoAlias)
987 return OW_None;
988 return OW_Unknown;
989 }
990
991 // Okay, we have stores to two completely different pointers. Try to
992 // decompose the pointer into a "base + constant_offset" form. If the base
993 // pointers are equal, then we can reason about the two stores.
994 DeadOff = 0;
995 KillingOff = 0;
996 const Value *DeadBasePtr =
997 GetPointerBaseWithConstantOffset(DeadPtr, DeadOff, DL);
998 const Value *KillingBasePtr =
999 GetPointerBaseWithConstantOffset(KillingPtr, KillingOff, DL);
1000
1001 // If the base pointers still differ, we have two completely different
1002 // stores.
1003 if (DeadBasePtr != KillingBasePtr)
1004 return OW_Unknown;
1005
1006 // The killing access completely overlaps the dead store if and only if
1007 // both start and end of the dead one is "inside" the killing one:
1008 // |<->|--dead--|<->|
1009 // |-----killing------|
1010 // Accesses may overlap if and only if start of one of them is "inside"
1011 // another one:
1012 // |<->|--dead--|<-------->|
1013 // |-------killing--------|
1014 // OR
1015 // |-------dead-------|
1016 // |<->|---killing---|<----->|
1017 //
1018 // We have to be careful here as *Off is signed while *.Size is unsigned.
1019
1020 // Check if the dead access starts "not before" the killing one.
1021 if (DeadOff >= KillingOff) {
1022 // If the dead access ends "not after" the killing access then the
1023 // dead one is completely overwritten by the killing one.
1024 if (uint64_t(DeadOff - KillingOff) + DeadSize <= KillingSize)
1025 return OW_Complete;
1026 // If start of the dead access is "before" end of the killing access
1027 // then accesses overlap.
1028 else if ((uint64_t)(DeadOff - KillingOff) < KillingSize)
1029 return OW_MaybePartial;
1030 }
1031 // If start of the killing access is "before" end of the dead access then
1032 // accesses overlap.
1033 else if ((uint64_t)(KillingOff - DeadOff) < DeadSize) {
1034 return OW_MaybePartial;
1035 }
1036
1037 // Can reach here only if accesses are known not to overlap.
1038 return OW_None;
1039 }
1040
1041 bool isInvisibleToCallerAfterRet(const Value *V) {
1042 if (isa<AllocaInst>(V))
1043 return true;
1044 auto I = InvisibleToCallerAfterRet.insert({V, false});
1045 if (I.second) {
1046 if (!isInvisibleToCallerOnUnwind(V)) {
1047 I.first->second = false;
1048 } else if (isNoAliasCall(V)) {
1049 I.first->second = !PointerMayBeCaptured(V, true, false, EphValues);
1050 }
1051 }
1052 return I.first->second;
1053 }
1054
1055 bool isInvisibleToCallerOnUnwind(const Value *V) {
1056 bool RequiresNoCaptureBeforeUnwind;
1057 if (!isNotVisibleOnUnwind(V, RequiresNoCaptureBeforeUnwind))
1058 return false;
1059 if (!RequiresNoCaptureBeforeUnwind)
1060 return true;
1061
1062 auto I = CapturedBeforeReturn.insert({V, true});
1063 if (I.second)
1064 // NOTE: This could be made more precise by PointerMayBeCapturedBefore
1065 // with the killing MemoryDef. But we refrain from doing so for now to
1066 // limit compile-time and this does not cause any changes to the number
1067 // of stores removed on a large test set in practice.
1068 I.first->second = PointerMayBeCaptured(V, false, true, EphValues);
1069 return !I.first->second;
1070 }
1071
1072 std::optional<MemoryLocation> getLocForWrite(Instruction *I) const {
1073 if (!I->mayWriteToMemory())
1074 return std::nullopt;
1075
1076 if (auto *CB = dyn_cast<CallBase>(I))
1077 return MemoryLocation::getForDest(CB, TLI);
1078
1080 }
1081
1082 /// Assuming this instruction has a dead analyzable write, can we delete
1083 /// this instruction?
1084 bool isRemovable(Instruction *I) {
1085 assert(getLocForWrite(I) && "Must have analyzable write");
1086
1087 // Don't remove volatile/atomic stores.
1088 if (StoreInst *SI = dyn_cast<StoreInst>(I))
1089 return SI->isUnordered();
1090
1091 if (auto *CB = dyn_cast<CallBase>(I)) {
1092 // Don't remove volatile memory intrinsics.
1093 if (auto *MI = dyn_cast<MemIntrinsic>(CB))
1094 return !MI->isVolatile();
1095
1096 // Never remove dead lifetime intrinsics, e.g. because they are followed
1097 // by a free.
1098 if (CB->isLifetimeStartOrEnd())
1099 return false;
1100
1101 return CB->use_empty() && CB->willReturn() && CB->doesNotThrow() &&
1102 !CB->isTerminator();
1103 }
1104
1105 return false;
1106 }
1107
1108 /// Returns true if \p UseInst completely overwrites \p DefLoc
1109 /// (stored by \p DefInst).
1110 bool isCompleteOverwrite(const MemoryLocation &DefLoc, Instruction *DefInst,
1111 Instruction *UseInst) {
1112 // UseInst has a MemoryDef associated in MemorySSA. It's possible for a
1113 // MemoryDef to not write to memory, e.g. a volatile load is modeled as a
1114 // MemoryDef.
1115 if (!UseInst->mayWriteToMemory())
1116 return false;
1117
1118 if (auto *CB = dyn_cast<CallBase>(UseInst))
1119 if (CB->onlyAccessesInaccessibleMemory())
1120 return false;
1121
1122 int64_t InstWriteOffset, DepWriteOffset;
1123 if (auto CC = getLocForWrite(UseInst))
1124 return isOverwrite(UseInst, DefInst, *CC, DefLoc, InstWriteOffset,
1125 DepWriteOffset) == OW_Complete;
1126 return false;
1127 }
1128
1129 /// Returns true if \p Def is not read before returning from the function.
1130 bool isWriteAtEndOfFunction(MemoryDef *Def) {
1131 LLVM_DEBUG(dbgs() << " Check if def " << *Def << " ("
1132 << *Def->getMemoryInst()
1133 << ") is at the end the function \n");
1134
1135 auto MaybeLoc = getLocForWrite(Def->getMemoryInst());
1136 if (!MaybeLoc) {
1137 LLVM_DEBUG(dbgs() << " ... could not get location for write.\n");
1138 return false;
1139 }
1140
1143 auto PushMemUses = [&WorkList, &Visited](MemoryAccess *Acc) {
1144 if (!Visited.insert(Acc).second)
1145 return;
1146 for (Use &U : Acc->uses())
1147 WorkList.push_back(cast<MemoryAccess>(U.getUser()));
1148 };
1149 PushMemUses(Def);
1150 for (unsigned I = 0; I < WorkList.size(); I++) {
1151 if (WorkList.size() >= MemorySSAScanLimit) {
1152 LLVM_DEBUG(dbgs() << " ... hit exploration limit.\n");
1153 return false;
1154 }
1155
1156 MemoryAccess *UseAccess = WorkList[I];
1157 if (isa<MemoryPhi>(UseAccess)) {
1158 // AliasAnalysis does not account for loops. Limit elimination to
1159 // candidates for which we can guarantee they always store to the same
1160 // memory location.
1161 if (!isGuaranteedLoopInvariant(MaybeLoc->Ptr))
1162 return false;
1163
1164 PushMemUses(cast<MemoryPhi>(UseAccess));
1165 continue;
1166 }
1167 // TODO: Checking for aliasing is expensive. Consider reducing the amount
1168 // of times this is called and/or caching it.
1169 Instruction *UseInst = cast<MemoryUseOrDef>(UseAccess)->getMemoryInst();
1170 if (isReadClobber(*MaybeLoc, UseInst)) {
1171 LLVM_DEBUG(dbgs() << " ... hit read clobber " << *UseInst << ".\n");
1172 return false;
1173 }
1174
1175 if (MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess))
1176 PushMemUses(UseDef);
1177 }
1178 return true;
1179 }
1180
1181 /// If \p I is a memory terminator like llvm.lifetime.end or free, return a
1182 /// pair with the MemoryLocation terminated by \p I and a boolean flag
1183 /// indicating whether \p I is a free-like call.
1184 std::optional<std::pair<MemoryLocation, bool>>
1185 getLocForTerminator(Instruction *I) const {
1186 uint64_t Len;
1187 Value *Ptr;
1188 if (match(I, m_Intrinsic<Intrinsic::lifetime_end>(m_ConstantInt(Len),
1189 m_Value(Ptr))))
1190 return {std::make_pair(MemoryLocation(Ptr, Len), false)};
1191
1192 if (auto *CB = dyn_cast<CallBase>(I)) {
1193 if (Value *FreedOp = getFreedOperand(CB, &TLI))
1194 return {std::make_pair(MemoryLocation::getAfter(FreedOp), true)};
1195 }
1196
1197 return std::nullopt;
1198 }
1199
1200 /// Returns true if \p I is a memory terminator instruction like
1201 /// llvm.lifetime.end or free.
1202 bool isMemTerminatorInst(Instruction *I) const {
1203 auto *CB = dyn_cast<CallBase>(I);
1204 return CB && (CB->getIntrinsicID() == Intrinsic::lifetime_end ||
1205 getFreedOperand(CB, &TLI) != nullptr);
1206 }
1207
1208 /// Returns true if \p MaybeTerm is a memory terminator for \p Loc from
1209 /// instruction \p AccessI.
1210 bool isMemTerminator(const MemoryLocation &Loc, Instruction *AccessI,
1211 Instruction *MaybeTerm) {
1212 std::optional<std::pair<MemoryLocation, bool>> MaybeTermLoc =
1213 getLocForTerminator(MaybeTerm);
1214
1215 if (!MaybeTermLoc)
1216 return false;
1217
1218 // If the terminator is a free-like call, all accesses to the underlying
1219 // object can be considered terminated.
1220 if (getUnderlyingObject(Loc.Ptr) !=
1221 getUnderlyingObject(MaybeTermLoc->first.Ptr))
1222 return false;
1223
1224 auto TermLoc = MaybeTermLoc->first;
1225 if (MaybeTermLoc->second) {
1226 const Value *LocUO = getUnderlyingObject(Loc.Ptr);
1227 return BatchAA.isMustAlias(TermLoc.Ptr, LocUO);
1228 }
1229 int64_t InstWriteOffset = 0;
1230 int64_t DepWriteOffset = 0;
1231 return isOverwrite(MaybeTerm, AccessI, TermLoc, Loc, InstWriteOffset,
1232 DepWriteOffset) == OW_Complete;
1233 }
1234
1235 // Returns true if \p Use may read from \p DefLoc.
1236 bool isReadClobber(const MemoryLocation &DefLoc, Instruction *UseInst) {
1237 if (isNoopIntrinsic(UseInst))
1238 return false;
1239
1240 // Monotonic or weaker atomic stores can be re-ordered and do not need to be
1241 // treated as read clobber.
1242 if (auto SI = dyn_cast<StoreInst>(UseInst))
1243 return isStrongerThan(SI->getOrdering(), AtomicOrdering::Monotonic);
1244
1245 if (!UseInst->mayReadFromMemory())
1246 return false;
1247
1248 if (auto *CB = dyn_cast<CallBase>(UseInst))
1249 if (CB->onlyAccessesInaccessibleMemory())
1250 return false;
1251
1252 return isRefSet(BatchAA.getModRefInfo(UseInst, DefLoc));
1253 }
1254
1255 /// Returns true if a dependency between \p Current and \p KillingDef is
1256 /// guaranteed to be loop invariant for the loops that they are in. Either
1257 /// because they are known to be in the same block, in the same loop level or
1258 /// by guaranteeing that \p CurrentLoc only references a single MemoryLocation
1259 /// during execution of the containing function.
1260 bool isGuaranteedLoopIndependent(const Instruction *Current,
1261 const Instruction *KillingDef,
1262 const MemoryLocation &CurrentLoc) {
1263 // If the dependency is within the same block or loop level (being careful
1264 // of irreducible loops), we know that AA will return a valid result for the
1265 // memory dependency. (Both at the function level, outside of any loop,
1266 // would also be valid but we currently disable that to limit compile time).
1267 if (Current->getParent() == KillingDef->getParent())
1268 return true;
1269 const Loop *CurrentLI = LI.getLoopFor(Current->getParent());
1270 if (!ContainsIrreducibleLoops && CurrentLI &&
1271 CurrentLI == LI.getLoopFor(KillingDef->getParent()))
1272 return true;
1273 // Otherwise check the memory location is invariant to any loops.
1274 return isGuaranteedLoopInvariant(CurrentLoc.Ptr);
1275 }
1276
1277 /// Returns true if \p Ptr is guaranteed to be loop invariant for any possible
1278 /// loop. In particular, this guarantees that it only references a single
1279 /// MemoryLocation during execution of the containing function.
1280 bool isGuaranteedLoopInvariant(const Value *Ptr) {
1281 Ptr = Ptr->stripPointerCasts();
1282 if (auto *GEP = dyn_cast<GEPOperator>(Ptr))
1283 if (GEP->hasAllConstantIndices())
1284 Ptr = GEP->getPointerOperand()->stripPointerCasts();
1285
1286 if (auto *I = dyn_cast<Instruction>(Ptr)) {
1287 return I->getParent()->isEntryBlock() ||
1288 (!ContainsIrreducibleLoops && !LI.getLoopFor(I->getParent()));
1289 }
1290 return true;
1291 }
1292
1293 // Find a MemoryDef writing to \p KillingLoc and dominating \p StartAccess,
1294 // with no read access between them or on any other path to a function exit
1295 // block if \p KillingLoc is not accessible after the function returns. If
1296 // there is no such MemoryDef, return std::nullopt. The returned value may not
1297 // (completely) overwrite \p KillingLoc. Currently we bail out when we
1298 // encounter an aliasing MemoryUse (read).
1299 std::optional<MemoryAccess *>
1300 getDomMemoryDef(MemoryDef *KillingDef, MemoryAccess *StartAccess,
1301 const MemoryLocation &KillingLoc, const Value *KillingUndObj,
1302 unsigned &ScanLimit, unsigned &WalkerStepLimit,
1303 bool IsMemTerm, unsigned &PartialLimit) {
1304 if (ScanLimit == 0 || WalkerStepLimit == 0) {
1305 LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n");
1306 return std::nullopt;
1307 }
1308
1309 MemoryAccess *Current = StartAccess;
1310 Instruction *KillingI = KillingDef->getMemoryInst();
1311 LLVM_DEBUG(dbgs() << " trying to get dominating access\n");
1312
1313 // Only optimize defining access of KillingDef when directly starting at its
1314 // defining access. The defining access also must only access KillingLoc. At
1315 // the moment we only support instructions with a single write location, so
1316 // it should be sufficient to disable optimizations for instructions that
1317 // also read from memory.
1318 bool CanOptimize = OptimizeMemorySSA &&
1319 KillingDef->getDefiningAccess() == StartAccess &&
1320 !KillingI->mayReadFromMemory();
1321
1322 // Find the next clobbering Mod access for DefLoc, starting at StartAccess.
1323 std::optional<MemoryLocation> CurrentLoc;
1324 for (;; Current = cast<MemoryDef>(Current)->getDefiningAccess()) {
1325 LLVM_DEBUG({
1326 dbgs() << " visiting " << *Current;
1327 if (!MSSA.isLiveOnEntryDef(Current) && isa<MemoryUseOrDef>(Current))
1328 dbgs() << " (" << *cast<MemoryUseOrDef>(Current)->getMemoryInst()
1329 << ")";
1330 dbgs() << "\n";
1331 });
1332
1333 // Reached TOP.
1334 if (MSSA.isLiveOnEntryDef(Current)) {
1335 LLVM_DEBUG(dbgs() << " ... found LiveOnEntryDef\n");
1336 if (CanOptimize && Current != KillingDef->getDefiningAccess())
1337 // The first clobbering def is... none.
1338 KillingDef->setOptimized(Current);
1339 return std::nullopt;
1340 }
1341
1342 // Cost of a step. Accesses in the same block are more likely to be valid
1343 // candidates for elimination, hence consider them cheaper.
1344 unsigned StepCost = KillingDef->getBlock() == Current->getBlock()
1347 if (WalkerStepLimit <= StepCost) {
1348 LLVM_DEBUG(dbgs() << " ... hit walker step limit\n");
1349 return std::nullopt;
1350 }
1351 WalkerStepLimit -= StepCost;
1352
1353 // Return for MemoryPhis. They cannot be eliminated directly and the
1354 // caller is responsible for traversing them.
1355 if (isa<MemoryPhi>(Current)) {
1356 LLVM_DEBUG(dbgs() << " ... found MemoryPhi\n");
1357 return Current;
1358 }
1359
1360 // Below, check if CurrentDef is a valid candidate to be eliminated by
1361 // KillingDef. If it is not, check the next candidate.
1362 MemoryDef *CurrentDef = cast<MemoryDef>(Current);
1363 Instruction *CurrentI = CurrentDef->getMemoryInst();
1364
1365 if (canSkipDef(CurrentDef, !isInvisibleToCallerOnUnwind(KillingUndObj))) {
1366 CanOptimize = false;
1367 continue;
1368 }
1369
1370 // Before we try to remove anything, check for any extra throwing
1371 // instructions that block us from DSEing
1372 if (mayThrowBetween(KillingI, CurrentI, KillingUndObj)) {
1373 LLVM_DEBUG(dbgs() << " ... skip, may throw!\n");
1374 return std::nullopt;
1375 }
1376
1377 // Check for anything that looks like it will be a barrier to further
1378 // removal
1379 if (isDSEBarrier(KillingUndObj, CurrentI)) {
1380 LLVM_DEBUG(dbgs() << " ... skip, barrier\n");
1381 return std::nullopt;
1382 }
1383
1384 // If Current is known to be on path that reads DefLoc or is a read
1385 // clobber, bail out, as the path is not profitable. We skip this check
1386 // for intrinsic calls, because the code knows how to handle memcpy
1387 // intrinsics.
1388 if (!isa<IntrinsicInst>(CurrentI) && isReadClobber(KillingLoc, CurrentI))
1389 return std::nullopt;
1390
1391 // Quick check if there are direct uses that are read-clobbers.
1392 if (any_of(Current->uses(), [this, &KillingLoc, StartAccess](Use &U) {
1393 if (auto *UseOrDef = dyn_cast<MemoryUseOrDef>(U.getUser()))
1394 return !MSSA.dominates(StartAccess, UseOrDef) &&
1395 isReadClobber(KillingLoc, UseOrDef->getMemoryInst());
1396 return false;
1397 })) {
1398 LLVM_DEBUG(dbgs() << " ... found a read clobber\n");
1399 return std::nullopt;
1400 }
1401
1402 // If Current does not have an analyzable write location or is not
1403 // removable, skip it.
1404 CurrentLoc = getLocForWrite(CurrentI);
1405 if (!CurrentLoc || !isRemovable(CurrentI)) {
1406 CanOptimize = false;
1407 continue;
1408 }
1409
1410 // AliasAnalysis does not account for loops. Limit elimination to
1411 // candidates for which we can guarantee they always store to the same
1412 // memory location and not located in different loops.
1413 if (!isGuaranteedLoopIndependent(CurrentI, KillingI, *CurrentLoc)) {
1414 LLVM_DEBUG(dbgs() << " ... not guaranteed loop independent\n");
1415 CanOptimize = false;
1416 continue;
1417 }
1418
1419 if (IsMemTerm) {
1420 // If the killing def is a memory terminator (e.g. lifetime.end), check
1421 // the next candidate if the current Current does not write the same
1422 // underlying object as the terminator.
1423 if (!isMemTerminator(*CurrentLoc, CurrentI, KillingI)) {
1424 CanOptimize = false;
1425 continue;
1426 }
1427 } else {
1428 int64_t KillingOffset = 0;
1429 int64_t DeadOffset = 0;
1430 auto OR = isOverwrite(KillingI, CurrentI, KillingLoc, *CurrentLoc,
1431 KillingOffset, DeadOffset);
1432 if (CanOptimize) {
1433 // CurrentDef is the earliest write clobber of KillingDef. Use it as
1434 // optimized access. Do not optimize if CurrentDef is already the
1435 // defining access of KillingDef.
1436 if (CurrentDef != KillingDef->getDefiningAccess() &&
1437 (OR == OW_Complete || OR == OW_MaybePartial))
1438 KillingDef->setOptimized(CurrentDef);
1439
1440 // Once a may-aliasing def is encountered do not set an optimized
1441 // access.
1442 if (OR != OW_None)
1443 CanOptimize = false;
1444 }
1445
1446 // If Current does not write to the same object as KillingDef, check
1447 // the next candidate.
1448 if (OR == OW_Unknown || OR == OW_None)
1449 continue;
1450 else if (OR == OW_MaybePartial) {
1451 // If KillingDef only partially overwrites Current, check the next
1452 // candidate if the partial step limit is exceeded. This aggressively
1453 // limits the number of candidates for partial store elimination,
1454 // which are less likely to be removable in the end.
1455 if (PartialLimit <= 1) {
1456 WalkerStepLimit -= 1;
1457 LLVM_DEBUG(dbgs() << " ... reached partial limit ... continue with next access\n");
1458 continue;
1459 }
1460 PartialLimit -= 1;
1461 }
1462 }
1463 break;
1464 };
1465
1466 // Accesses to objects accessible after the function returns can only be
1467 // eliminated if the access is dead along all paths to the exit. Collect
1468 // the blocks with killing (=completely overwriting MemoryDefs) and check if
1469 // they cover all paths from MaybeDeadAccess to any function exit.
1471 KillingDefs.insert(KillingDef->getMemoryInst());
1472 MemoryAccess *MaybeDeadAccess = Current;
1473 MemoryLocation MaybeDeadLoc = *CurrentLoc;
1474 Instruction *MaybeDeadI = cast<MemoryDef>(MaybeDeadAccess)->getMemoryInst();
1475 LLVM_DEBUG(dbgs() << " Checking for reads of " << *MaybeDeadAccess << " ("
1476 << *MaybeDeadI << ")\n");
1477
1479 auto PushMemUses = [&WorkList](MemoryAccess *Acc) {
1480 for (Use &U : Acc->uses())
1481 WorkList.insert(cast<MemoryAccess>(U.getUser()));
1482 };
1483 PushMemUses(MaybeDeadAccess);
1484
1485 // Check if DeadDef may be read.
1486 for (unsigned I = 0; I < WorkList.size(); I++) {
1487 MemoryAccess *UseAccess = WorkList[I];
1488
1489 LLVM_DEBUG(dbgs() << " " << *UseAccess);
1490 // Bail out if the number of accesses to check exceeds the scan limit.
1491 if (ScanLimit < (WorkList.size() - I)) {
1492 LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n");
1493 return std::nullopt;
1494 }
1495 --ScanLimit;
1496 NumDomMemDefChecks++;
1497
1498 if (isa<MemoryPhi>(UseAccess)) {
1499 if (any_of(KillingDefs, [this, UseAccess](Instruction *KI) {
1500 return DT.properlyDominates(KI->getParent(),
1501 UseAccess->getBlock());
1502 })) {
1503 LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing block\n");
1504 continue;
1505 }
1506 LLVM_DEBUG(dbgs() << "\n ... adding PHI uses\n");
1507 PushMemUses(UseAccess);
1508 continue;
1509 }
1510
1511 Instruction *UseInst = cast<MemoryUseOrDef>(UseAccess)->getMemoryInst();
1512 LLVM_DEBUG(dbgs() << " (" << *UseInst << ")\n");
1513
1514 if (any_of(KillingDefs, [this, UseInst](Instruction *KI) {
1515 return DT.dominates(KI, UseInst);
1516 })) {
1517 LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing def\n");
1518 continue;
1519 }
1520
1521 // A memory terminator kills all preceeding MemoryDefs and all succeeding
1522 // MemoryAccesses. We do not have to check it's users.
1523 if (isMemTerminator(MaybeDeadLoc, MaybeDeadI, UseInst)) {
1524 LLVM_DEBUG(
1525 dbgs()
1526 << " ... skipping, memterminator invalidates following accesses\n");
1527 continue;
1528 }
1529
1530 if (isNoopIntrinsic(cast<MemoryUseOrDef>(UseAccess)->getMemoryInst())) {
1531 LLVM_DEBUG(dbgs() << " ... adding uses of intrinsic\n");
1532 PushMemUses(UseAccess);
1533 continue;
1534 }
1535
1536 if (UseInst->mayThrow() && !isInvisibleToCallerOnUnwind(KillingUndObj)) {
1537 LLVM_DEBUG(dbgs() << " ... found throwing instruction\n");
1538 return std::nullopt;
1539 }
1540
1541 // Uses which may read the original MemoryDef mean we cannot eliminate the
1542 // original MD. Stop walk.
1543 if (isReadClobber(MaybeDeadLoc, UseInst)) {
1544 LLVM_DEBUG(dbgs() << " ... found read clobber\n");
1545 return std::nullopt;
1546 }
1547
1548 // If this worklist walks back to the original memory access (and the
1549 // pointer is not guarenteed loop invariant) then we cannot assume that a
1550 // store kills itself.
1551 if (MaybeDeadAccess == UseAccess &&
1552 !isGuaranteedLoopInvariant(MaybeDeadLoc.Ptr)) {
1553 LLVM_DEBUG(dbgs() << " ... found not loop invariant self access\n");
1554 return std::nullopt;
1555 }
1556 // Otherwise, for the KillingDef and MaybeDeadAccess we only have to check
1557 // if it reads the memory location.
1558 // TODO: It would probably be better to check for self-reads before
1559 // calling the function.
1560 if (KillingDef == UseAccess || MaybeDeadAccess == UseAccess) {
1561 LLVM_DEBUG(dbgs() << " ... skipping killing def/dom access\n");
1562 continue;
1563 }
1564
1565 // Check all uses for MemoryDefs, except for defs completely overwriting
1566 // the original location. Otherwise we have to check uses of *all*
1567 // MemoryDefs we discover, including non-aliasing ones. Otherwise we might
1568 // miss cases like the following
1569 // 1 = Def(LoE) ; <----- DeadDef stores [0,1]
1570 // 2 = Def(1) ; (2, 1) = NoAlias, stores [2,3]
1571 // Use(2) ; MayAlias 2 *and* 1, loads [0, 3].
1572 // (The Use points to the *first* Def it may alias)
1573 // 3 = Def(1) ; <---- Current (3, 2) = NoAlias, (3,1) = MayAlias,
1574 // stores [0,1]
1575 if (MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess)) {
1576 if (isCompleteOverwrite(MaybeDeadLoc, MaybeDeadI, UseInst)) {
1577 BasicBlock *MaybeKillingBlock = UseInst->getParent();
1578 if (PostOrderNumbers.find(MaybeKillingBlock)->second <
1579 PostOrderNumbers.find(MaybeDeadAccess->getBlock())->second) {
1580 if (!isInvisibleToCallerAfterRet(KillingUndObj)) {
1582 << " ... found killing def " << *UseInst << "\n");
1583 KillingDefs.insert(UseInst);
1584 }
1585 } else {
1587 << " ... found preceeding def " << *UseInst << "\n");
1588 return std::nullopt;
1589 }
1590 } else
1591 PushMemUses(UseDef);
1592 }
1593 }
1594
1595 // For accesses to locations visible after the function returns, make sure
1596 // that the location is dead (=overwritten) along all paths from
1597 // MaybeDeadAccess to the exit.
1598 if (!isInvisibleToCallerAfterRet(KillingUndObj)) {
1599 SmallPtrSet<BasicBlock *, 16> KillingBlocks;
1600 for (Instruction *KD : KillingDefs)
1601 KillingBlocks.insert(KD->getParent());
1602 assert(!KillingBlocks.empty() &&
1603 "Expected at least a single killing block");
1604
1605 // Find the common post-dominator of all killing blocks.
1606 BasicBlock *CommonPred = *KillingBlocks.begin();
1607 for (BasicBlock *BB : llvm::drop_begin(KillingBlocks)) {
1608 if (!CommonPred)
1609 break;
1610 CommonPred = PDT.findNearestCommonDominator(CommonPred, BB);
1611 }
1612
1613 // If the common post-dominator does not post-dominate MaybeDeadAccess,
1614 // there is a path from MaybeDeadAccess to an exit not going through a
1615 // killing block.
1616 if (!PDT.dominates(CommonPred, MaybeDeadAccess->getBlock())) {
1617 if (!AnyUnreachableExit)
1618 return std::nullopt;
1619
1620 // Fall back to CFG scan starting at all non-unreachable roots if not
1621 // all paths to the exit go through CommonPred.
1622 CommonPred = nullptr;
1623 }
1624
1625 // If CommonPred itself is in the set of killing blocks, we're done.
1626 if (KillingBlocks.count(CommonPred))
1627 return {MaybeDeadAccess};
1628
1629 SetVector<BasicBlock *> WorkList;
1630 // If CommonPred is null, there are multiple exits from the function.
1631 // They all have to be added to the worklist.
1632 if (CommonPred)
1633 WorkList.insert(CommonPred);
1634 else
1635 for (BasicBlock *R : PDT.roots()) {
1636 if (!isa<UnreachableInst>(R->getTerminator()))
1637 WorkList.insert(R);
1638 }
1639
1640 NumCFGTries++;
1641 // Check if all paths starting from an exit node go through one of the
1642 // killing blocks before reaching MaybeDeadAccess.
1643 for (unsigned I = 0; I < WorkList.size(); I++) {
1644 NumCFGChecks++;
1645 BasicBlock *Current = WorkList[I];
1646 if (KillingBlocks.count(Current))
1647 continue;
1648 if (Current == MaybeDeadAccess->getBlock())
1649 return std::nullopt;
1650
1651 // MaybeDeadAccess is reachable from the entry, so we don't have to
1652 // explore unreachable blocks further.
1653 if (!DT.isReachableFromEntry(Current))
1654 continue;
1655
1656 for (BasicBlock *Pred : predecessors(Current))
1657 WorkList.insert(Pred);
1658
1659 if (WorkList.size() >= MemorySSAPathCheckLimit)
1660 return std::nullopt;
1661 }
1662 NumCFGSuccess++;
1663 }
1664
1665 // No aliasing MemoryUses of MaybeDeadAccess found, MaybeDeadAccess is
1666 // potentially dead.
1667 return {MaybeDeadAccess};
1668 }
1669
1670 // Delete dead memory defs
1672 MemorySSAUpdater Updater(&MSSA);
1673 SmallVector<Instruction *, 32> NowDeadInsts;
1674 NowDeadInsts.push_back(SI);
1675 --NumFastOther;
1676
1677 while (!NowDeadInsts.empty()) {
1678 Instruction *DeadInst = NowDeadInsts.pop_back_val();
1679 ++NumFastOther;
1680
1681 // Try to preserve debug information attached to the dead instruction.
1682 salvageDebugInfo(*DeadInst);
1683 salvageKnowledge(DeadInst);
1684
1685 // Remove the Instruction from MSSA.
1686 if (MemoryAccess *MA = MSSA.getMemoryAccess(DeadInst)) {
1687 if (MemoryDef *MD = dyn_cast<MemoryDef>(MA)) {
1688 SkipStores.insert(MD);
1689 if (auto *SI = dyn_cast<StoreInst>(MD->getMemoryInst())) {
1690 if (SI->getValueOperand()->getType()->isPointerTy()) {
1691 const Value *UO = getUnderlyingObject(SI->getValueOperand());
1692 if (CapturedBeforeReturn.erase(UO))
1693 ShouldIterateEndOfFunctionDSE = true;
1694 InvisibleToCallerAfterRet.erase(UO);
1695 }
1696 }
1697 }
1698
1699 Updater.removeMemoryAccess(MA);
1700 }
1701
1702 auto I = IOLs.find(DeadInst->getParent());
1703 if (I != IOLs.end())
1704 I->second.erase(DeadInst);
1705 // Remove its operands
1706 for (Use &O : DeadInst->operands())
1707 if (Instruction *OpI = dyn_cast<Instruction>(O)) {
1708 O = nullptr;
1709 if (isInstructionTriviallyDead(OpI, &TLI))
1710 NowDeadInsts.push_back(OpI);
1711 }
1712
1713 EI.removeInstruction(DeadInst);
1714 DeadInst->eraseFromParent();
1715 }
1716 }
1717
1718 // Check for any extra throws between \p KillingI and \p DeadI that block
1719 // DSE. This only checks extra maythrows (those that aren't MemoryDef's).
1720 // MemoryDef that may throw are handled during the walk from one def to the
1721 // next.
1722 bool mayThrowBetween(Instruction *KillingI, Instruction *DeadI,
1723 const Value *KillingUndObj) {
1724 // First see if we can ignore it by using the fact that KillingI is an
1725 // alloca/alloca like object that is not visible to the caller during
1726 // execution of the function.
1727 if (KillingUndObj && isInvisibleToCallerOnUnwind(KillingUndObj))
1728 return false;
1729
1730 if (KillingI->getParent() == DeadI->getParent())
1731 return ThrowingBlocks.count(KillingI->getParent());
1732 return !ThrowingBlocks.empty();
1733 }
1734
1735 // Check if \p DeadI acts as a DSE barrier for \p KillingI. The following
1736 // instructions act as barriers:
1737 // * A memory instruction that may throw and \p KillingI accesses a non-stack
1738 // object.
1739 // * Atomic stores stronger that monotonic.
1740 bool isDSEBarrier(const Value *KillingUndObj, Instruction *DeadI) {
1741 // If DeadI may throw it acts as a barrier, unless we are to an
1742 // alloca/alloca like object that does not escape.
1743 if (DeadI->mayThrow() && !isInvisibleToCallerOnUnwind(KillingUndObj))
1744 return true;
1745
1746 // If DeadI is an atomic load/store stronger than monotonic, do not try to
1747 // eliminate/reorder it.
1748 if (DeadI->isAtomic()) {
1749 if (auto *LI = dyn_cast<LoadInst>(DeadI))
1750 return isStrongerThanMonotonic(LI->getOrdering());
1751 if (auto *SI = dyn_cast<StoreInst>(DeadI))
1752 return isStrongerThanMonotonic(SI->getOrdering());
1753 if (auto *ARMW = dyn_cast<AtomicRMWInst>(DeadI))
1754 return isStrongerThanMonotonic(ARMW->getOrdering());
1755 if (auto *CmpXchg = dyn_cast<AtomicCmpXchgInst>(DeadI))
1756 return isStrongerThanMonotonic(CmpXchg->getSuccessOrdering()) ||
1757 isStrongerThanMonotonic(CmpXchg->getFailureOrdering());
1758 llvm_unreachable("other instructions should be skipped in MemorySSA");
1759 }
1760 return false;
1761 }
1762
1763 /// Eliminate writes to objects that are not visible in the caller and are not
1764 /// accessed before returning from the function.
1765 bool eliminateDeadWritesAtEndOfFunction() {
1766 bool MadeChange = false;
1767 LLVM_DEBUG(
1768 dbgs()
1769 << "Trying to eliminate MemoryDefs at the end of the function\n");
1770 do {
1771 ShouldIterateEndOfFunctionDSE = false;
1772 for (MemoryDef *Def : llvm::reverse(MemDefs)) {
1773 if (SkipStores.contains(Def))
1774 continue;
1775
1776 Instruction *DefI = Def->getMemoryInst();
1777 auto DefLoc = getLocForWrite(DefI);
1778 if (!DefLoc || !isRemovable(DefI))
1779 continue;
1780
1781 // NOTE: Currently eliminating writes at the end of a function is
1782 // limited to MemoryDefs with a single underlying object, to save
1783 // compile-time. In practice it appears the case with multiple
1784 // underlying objects is very uncommon. If it turns out to be important,
1785 // we can use getUnderlyingObjects here instead.
1786 const Value *UO = getUnderlyingObject(DefLoc->Ptr);
1787 if (!isInvisibleToCallerAfterRet(UO))
1788 continue;
1789
1790 if (isWriteAtEndOfFunction(Def)) {
1791 // See through pointer-to-pointer bitcasts
1792 LLVM_DEBUG(dbgs() << " ... MemoryDef is not accessed until the end "
1793 "of the function\n");
1795 ++NumFastStores;
1796 MadeChange = true;
1797 }
1798 }
1799 } while (ShouldIterateEndOfFunctionDSE);
1800 return MadeChange;
1801 }
1802
1803 /// If we have a zero initializing memset following a call to malloc,
1804 /// try folding it into a call to calloc.
1805 bool tryFoldIntoCalloc(MemoryDef *Def, const Value *DefUO) {
1806 Instruction *DefI = Def->getMemoryInst();
1807 MemSetInst *MemSet = dyn_cast<MemSetInst>(DefI);
1808 if (!MemSet)
1809 // TODO: Could handle zero store to small allocation as well.
1810 return false;
1811 Constant *StoredConstant = dyn_cast<Constant>(MemSet->getValue());
1812 if (!StoredConstant || !StoredConstant->isNullValue())
1813 return false;
1814
1815 if (!isRemovable(DefI))
1816 // The memset might be volatile..
1817 return false;
1818
1819 if (F.hasFnAttribute(Attribute::SanitizeMemory) ||
1820 F.hasFnAttribute(Attribute::SanitizeAddress) ||
1821 F.hasFnAttribute(Attribute::SanitizeHWAddress) ||
1822 F.getName() == "calloc")
1823 return false;
1824 auto *Malloc = const_cast<CallInst *>(dyn_cast<CallInst>(DefUO));
1825 if (!Malloc)
1826 return false;
1827 auto *InnerCallee = Malloc->getCalledFunction();
1828 if (!InnerCallee)
1829 return false;
1830 LibFunc Func;
1831 if (!TLI.getLibFunc(*InnerCallee, Func) || !TLI.has(Func) ||
1832 Func != LibFunc_malloc)
1833 return false;
1834
1835 auto shouldCreateCalloc = [](CallInst *Malloc, CallInst *Memset) {
1836 // Check for br(icmp ptr, null), truebb, falsebb) pattern at the end
1837 // of malloc block
1838 auto *MallocBB = Malloc->getParent(),
1839 *MemsetBB = Memset->getParent();
1840 if (MallocBB == MemsetBB)
1841 return true;
1842 auto *Ptr = Memset->getArgOperand(0);
1843 auto *TI = MallocBB->getTerminator();
1845 BasicBlock *TrueBB, *FalseBB;
1846 if (!match(TI, m_Br(m_ICmp(Pred, m_Specific(Ptr), m_Zero()), TrueBB,
1847 FalseBB)))
1848 return false;
1849 if (Pred != ICmpInst::ICMP_EQ || MemsetBB != FalseBB)
1850 return false;
1851 return true;
1852 };
1853
1854 if (Malloc->getOperand(0) != MemSet->getLength())
1855 return false;
1856 if (!shouldCreateCalloc(Malloc, MemSet) ||
1857 !DT.dominates(Malloc, MemSet) ||
1858 !memoryIsNotModifiedBetween(Malloc, MemSet, BatchAA, DL, &DT))
1859 return false;
1860 IRBuilder<> IRB(Malloc);
1861 Type *SizeTTy = Malloc->getArgOperand(0)->getType();
1862 auto *Calloc = emitCalloc(ConstantInt::get(SizeTTy, 1),
1863 Malloc->getArgOperand(0), IRB, TLI);
1864 if (!Calloc)
1865 return false;
1866 MemorySSAUpdater Updater(&MSSA);
1867 auto *LastDef =
1868 cast<MemoryDef>(Updater.getMemorySSA()->getMemoryAccess(Malloc));
1869 auto *NewAccess =
1870 Updater.createMemoryAccessAfter(cast<Instruction>(Calloc), LastDef,
1871 LastDef);
1872 auto *NewAccessMD = cast<MemoryDef>(NewAccess);
1873 Updater.insertDef(NewAccessMD, /*RenameUses=*/true);
1874 Updater.removeMemoryAccess(Malloc);
1875 Malloc->replaceAllUsesWith(Calloc);
1876 Malloc->eraseFromParent();
1877 return true;
1878 }
1879
1880 /// \returns true if \p Def is a no-op store, either because it
1881 /// directly stores back a loaded value or stores zero to a calloced object.
1882 bool storeIsNoop(MemoryDef *Def, const Value *DefUO) {
1883 Instruction *DefI = Def->getMemoryInst();
1884 StoreInst *Store = dyn_cast<StoreInst>(DefI);
1885 MemSetInst *MemSet = dyn_cast<MemSetInst>(DefI);
1886 Constant *StoredConstant = nullptr;
1887 if (Store)
1888 StoredConstant = dyn_cast<Constant>(Store->getOperand(0));
1889 else if (MemSet)
1890 StoredConstant = dyn_cast<Constant>(MemSet->getValue());
1891 else
1892 return false;
1893
1894 if (!isRemovable(DefI))
1895 return false;
1896
1897 if (StoredConstant) {
1898 Constant *InitC =
1899 getInitialValueOfAllocation(DefUO, &TLI, StoredConstant->getType());
1900 // If the clobbering access is LiveOnEntry, no instructions between them
1901 // can modify the memory location.
1902 if (InitC && InitC == StoredConstant)
1903 return MSSA.isLiveOnEntryDef(
1904 MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def, BatchAA));
1905 }
1906
1907 if (!Store)
1908 return false;
1909
1910 if (auto *LoadI = dyn_cast<LoadInst>(Store->getOperand(0))) {
1911 if (LoadI->getPointerOperand() == Store->getOperand(1)) {
1912 // Get the defining access for the load.
1913 auto *LoadAccess = MSSA.getMemoryAccess(LoadI)->getDefiningAccess();
1914 // Fast path: the defining accesses are the same.
1915 if (LoadAccess == Def->getDefiningAccess())
1916 return true;
1917
1918 // Look through phi accesses. Recursively scan all phi accesses by
1919 // adding them to a worklist. Bail when we run into a memory def that
1920 // does not match LoadAccess.
1922 MemoryAccess *Current =
1923 MSSA.getWalker()->getClobberingMemoryAccess(Def, BatchAA);
1924 // We don't want to bail when we run into the store memory def. But,
1925 // the phi access may point to it. So, pretend like we've already
1926 // checked it.
1927 ToCheck.insert(Def);
1928 ToCheck.insert(Current);
1929 // Start at current (1) to simulate already having checked Def.
1930 for (unsigned I = 1; I < ToCheck.size(); ++I) {
1931 Current = ToCheck[I];
1932 if (auto PhiAccess = dyn_cast<MemoryPhi>(Current)) {
1933 // Check all the operands.
1934 for (auto &Use : PhiAccess->incoming_values())
1935 ToCheck.insert(cast<MemoryAccess>(&Use));
1936 continue;
1937 }
1938
1939 // If we found a memory def, bail. This happens when we have an
1940 // unrelated write in between an otherwise noop store.
1941 assert(isa<MemoryDef>(Current) &&
1942 "Only MemoryDefs should reach here.");
1943 // TODO: Skip no alias MemoryDefs that have no aliasing reads.
1944 // We are searching for the definition of the store's destination.
1945 // So, if that is the same definition as the load, then this is a
1946 // noop. Otherwise, fail.
1947 if (LoadAccess != Current)
1948 return false;
1949 }
1950 return true;
1951 }
1952 }
1953
1954 return false;
1955 }
1956
1957 bool removePartiallyOverlappedStores(InstOverlapIntervalsTy &IOL) {
1958 bool Changed = false;
1959 for (auto OI : IOL) {
1960 Instruction *DeadI = OI.first;
1961 MemoryLocation Loc = *getLocForWrite(DeadI);
1962 assert(isRemovable(DeadI) && "Expect only removable instruction");
1963
1964 const Value *Ptr = Loc.Ptr->stripPointerCasts();
1965 int64_t DeadStart = 0;
1966 uint64_t DeadSize = Loc.Size.getValue();
1967 GetPointerBaseWithConstantOffset(Ptr, DeadStart, DL);
1968 OverlapIntervalsTy &IntervalMap = OI.second;
1969 Changed |= tryToShortenEnd(DeadI, IntervalMap, DeadStart, DeadSize);
1970 if (IntervalMap.empty())
1971 continue;
1972 Changed |= tryToShortenBegin(DeadI, IntervalMap, DeadStart, DeadSize);
1973 }
1974 return Changed;
1975 }
1976
1977 /// Eliminates writes to locations where the value that is being written
1978 /// is already stored at the same location.
1979 bool eliminateRedundantStoresOfExistingValues() {
1980 bool MadeChange = false;
1981 LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs that write the "
1982 "already existing value\n");
1983 for (auto *Def : MemDefs) {
1984 if (SkipStores.contains(Def) || MSSA.isLiveOnEntryDef(Def))
1985 continue;
1986
1987 Instruction *DefInst = Def->getMemoryInst();
1988 auto MaybeDefLoc = getLocForWrite(DefInst);
1989 if (!MaybeDefLoc || !isRemovable(DefInst))
1990 continue;
1991
1992 MemoryDef *UpperDef;
1993 // To conserve compile-time, we avoid walking to the next clobbering def.
1994 // Instead, we just try to get the optimized access, if it exists. DSE
1995 // will try to optimize defs during the earlier traversal.
1996 if (Def->isOptimized())
1997 UpperDef = dyn_cast<MemoryDef>(Def->getOptimized());
1998 else
1999 UpperDef = dyn_cast<MemoryDef>(Def->getDefiningAccess());
2000 if (!UpperDef || MSSA.isLiveOnEntryDef(UpperDef))
2001 continue;
2002
2003 Instruction *UpperInst = UpperDef->getMemoryInst();
2004 auto IsRedundantStore = [&]() {
2005 if (DefInst->isIdenticalTo(UpperInst))
2006 return true;
2007 if (auto *MemSetI = dyn_cast<MemSetInst>(UpperInst)) {
2008 if (auto *SI = dyn_cast<StoreInst>(DefInst)) {
2009 // MemSetInst must have a write location.
2010 MemoryLocation UpperLoc = *getLocForWrite(UpperInst);
2011 int64_t InstWriteOffset = 0;
2012 int64_t DepWriteOffset = 0;
2013 auto OR = isOverwrite(UpperInst, DefInst, UpperLoc, *MaybeDefLoc,
2014 InstWriteOffset, DepWriteOffset);
2015 Value *StoredByte = isBytewiseValue(SI->getValueOperand(), DL);
2016 return StoredByte && StoredByte == MemSetI->getOperand(1) &&
2017 OR == OW_Complete;
2018 }
2019 }
2020 return false;
2021 };
2022
2023 if (!IsRedundantStore() || isReadClobber(*MaybeDefLoc, DefInst))
2024 continue;
2025 LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " << *DefInst
2026 << '\n');
2027 deleteDeadInstruction(DefInst);
2028 NumRedundantStores++;
2029 MadeChange = true;
2030 }
2031 return MadeChange;
2032 }
2033};
2034
2035static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
2037 AssumptionCache &AC,
2038 const TargetLibraryInfo &TLI,
2039 const LoopInfo &LI) {
2040 bool MadeChange = false;
2041
2042 MSSA.ensureOptimizedUses();
2043 DSEState State(F, AA, MSSA, DT, PDT, AC, TLI, LI);
2044 // For each store:
2045 for (unsigned I = 0; I < State.MemDefs.size(); I++) {
2046 MemoryDef *KillingDef = State.MemDefs[I];
2047 if (State.SkipStores.count(KillingDef))
2048 continue;
2049 Instruction *KillingI = KillingDef->getMemoryInst();
2050
2051 std::optional<MemoryLocation> MaybeKillingLoc;
2052 if (State.isMemTerminatorInst(KillingI)) {
2053 if (auto KillingLoc = State.getLocForTerminator(KillingI))
2054 MaybeKillingLoc = KillingLoc->first;
2055 } else {
2056 MaybeKillingLoc = State.getLocForWrite(KillingI);
2057 }
2058
2059 if (!MaybeKillingLoc) {
2060 LLVM_DEBUG(dbgs() << "Failed to find analyzable write location for "
2061 << *KillingI << "\n");
2062 continue;
2063 }
2064 MemoryLocation KillingLoc = *MaybeKillingLoc;
2065 assert(KillingLoc.Ptr && "KillingLoc should not be null");
2066 const Value *KillingUndObj = getUnderlyingObject(KillingLoc.Ptr);
2067 LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs killed by "
2068 << *KillingDef << " (" << *KillingI << ")\n");
2069
2070 unsigned ScanLimit = MemorySSAScanLimit;
2071 unsigned WalkerStepLimit = MemorySSAUpwardsStepLimit;
2072 unsigned PartialLimit = MemorySSAPartialStoreLimit;
2073 // Worklist of MemoryAccesses that may be killed by KillingDef.
2075 ToCheck.insert(KillingDef->getDefiningAccess());
2076
2077 bool Shortend = false;
2078 bool IsMemTerm = State.isMemTerminatorInst(KillingI);
2079 // Check if MemoryAccesses in the worklist are killed by KillingDef.
2080 for (unsigned I = 0; I < ToCheck.size(); I++) {
2081 MemoryAccess *Current = ToCheck[I];
2082 if (State.SkipStores.count(Current))
2083 continue;
2084
2085 std::optional<MemoryAccess *> MaybeDeadAccess = State.getDomMemoryDef(
2086 KillingDef, Current, KillingLoc, KillingUndObj, ScanLimit,
2087 WalkerStepLimit, IsMemTerm, PartialLimit);
2088
2089 if (!MaybeDeadAccess) {
2090 LLVM_DEBUG(dbgs() << " finished walk\n");
2091 continue;
2092 }
2093
2094 MemoryAccess *DeadAccess = *MaybeDeadAccess;
2095 LLVM_DEBUG(dbgs() << " Checking if we can kill " << *DeadAccess);
2096 if (isa<MemoryPhi>(DeadAccess)) {
2097 LLVM_DEBUG(dbgs() << "\n ... adding incoming values to worklist\n");
2098 for (Value *V : cast<MemoryPhi>(DeadAccess)->incoming_values()) {
2099 MemoryAccess *IncomingAccess = cast<MemoryAccess>(V);
2100 BasicBlock *IncomingBlock = IncomingAccess->getBlock();
2101 BasicBlock *PhiBlock = DeadAccess->getBlock();
2102
2103 // We only consider incoming MemoryAccesses that come before the
2104 // MemoryPhi. Otherwise we could discover candidates that do not
2105 // strictly dominate our starting def.
2106 if (State.PostOrderNumbers[IncomingBlock] >
2107 State.PostOrderNumbers[PhiBlock])
2108 ToCheck.insert(IncomingAccess);
2109 }
2110 continue;
2111 }
2112 auto *DeadDefAccess = cast<MemoryDef>(DeadAccess);
2113 Instruction *DeadI = DeadDefAccess->getMemoryInst();
2114 LLVM_DEBUG(dbgs() << " (" << *DeadI << ")\n");
2115 ToCheck.insert(DeadDefAccess->getDefiningAccess());
2116 NumGetDomMemoryDefPassed++;
2117
2118 if (!DebugCounter::shouldExecute(MemorySSACounter))
2119 continue;
2120
2121 MemoryLocation DeadLoc = *State.getLocForWrite(DeadI);
2122
2123 if (IsMemTerm) {
2124 const Value *DeadUndObj = getUnderlyingObject(DeadLoc.Ptr);
2125 if (KillingUndObj != DeadUndObj)
2126 continue;
2127 LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *DeadI
2128 << "\n KILLER: " << *KillingI << '\n');
2129 State.deleteDeadInstruction(DeadI);
2130 ++NumFastStores;
2131 MadeChange = true;
2132 } else {
2133 // Check if DeadI overwrites KillingI.
2134 int64_t KillingOffset = 0;
2135 int64_t DeadOffset = 0;
2136 OverwriteResult OR = State.isOverwrite(
2137 KillingI, DeadI, KillingLoc, DeadLoc, KillingOffset, DeadOffset);
2138 if (OR == OW_MaybePartial) {
2139 auto Iter = State.IOLs.insert(
2140 std::make_pair<BasicBlock *, InstOverlapIntervalsTy>(
2141 DeadI->getParent(), InstOverlapIntervalsTy()));
2142 auto &IOL = Iter.first->second;
2143 OR = isPartialOverwrite(KillingLoc, DeadLoc, KillingOffset,
2144 DeadOffset, DeadI, IOL);
2145 }
2146
2147 if (EnablePartialStoreMerging && OR == OW_PartialEarlierWithFullLater) {
2148 auto *DeadSI = dyn_cast<StoreInst>(DeadI);
2149 auto *KillingSI = dyn_cast<StoreInst>(KillingI);
2150 // We are re-using tryToMergePartialOverlappingStores, which requires
2151 // DeadSI to dominate DeadSI.
2152 // TODO: implement tryToMergeParialOverlappingStores using MemorySSA.
2153 if (DeadSI && KillingSI && DT.dominates(DeadSI, KillingSI)) {
2155 KillingSI, DeadSI, KillingOffset, DeadOffset, State.DL,
2156 State.BatchAA, &DT)) {
2157
2158 // Update stored value of earlier store to merged constant.
2159 DeadSI->setOperand(0, Merged);
2160 ++NumModifiedStores;
2161 MadeChange = true;
2162
2163 Shortend = true;
2164 // Remove killing store and remove any outstanding overlap
2165 // intervals for the updated store.
2166 State.deleteDeadInstruction(KillingSI);
2167 auto I = State.IOLs.find(DeadSI->getParent());
2168 if (I != State.IOLs.end())
2169 I->second.erase(DeadSI);
2170 break;
2171 }
2172 }
2173 }
2174
2175 if (OR == OW_Complete) {
2176 LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *DeadI
2177 << "\n KILLER: " << *KillingI << '\n');
2178 State.deleteDeadInstruction(DeadI);
2179 ++NumFastStores;
2180 MadeChange = true;
2181 }
2182 }
2183 }
2184
2185 // Check if the store is a no-op.
2186 if (!Shortend && State.storeIsNoop(KillingDef, KillingUndObj)) {
2187 LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " << *KillingI
2188 << '\n');
2189 State.deleteDeadInstruction(KillingI);
2190 NumRedundantStores++;
2191 MadeChange = true;
2192 continue;
2193 }
2194
2195 // Can we form a calloc from a memset/malloc pair?
2196 if (!Shortend && State.tryFoldIntoCalloc(KillingDef, KillingUndObj)) {
2197 LLVM_DEBUG(dbgs() << "DSE: Remove memset after forming calloc:\n"
2198 << " DEAD: " << *KillingI << '\n');
2199 State.deleteDeadInstruction(KillingI);
2200 MadeChange = true;
2201 continue;
2202 }
2203 }
2204
2206 for (auto &KV : State.IOLs)
2207 MadeChange |= State.removePartiallyOverlappedStores(KV.second);
2208
2209 MadeChange |= State.eliminateRedundantStoresOfExistingValues();
2210 MadeChange |= State.eliminateDeadWritesAtEndOfFunction();
2211 return MadeChange;
2212}
2213} // end anonymous namespace
2214
2215//===----------------------------------------------------------------------===//
2216// DSE Pass
2217//===----------------------------------------------------------------------===//
2222 MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
2225 LoopInfo &LI = AM.getResult<LoopAnalysis>(F);
2226
2227 bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, AC, TLI, LI);
2228
2229#ifdef LLVM_ENABLE_STATS
2231 for (auto &I : instructions(F))
2232 NumRemainingStores += isa<StoreInst>(&I);
2233#endif
2234
2235 if (!Changed)
2236 return PreservedAnalyses::all();
2237
2241 PA.preserve<LoopAnalysis>();
2242 return PA;
2243}
2244
2245namespace {
2246
2247/// A legacy pass for the legacy pass manager that wraps \c DSEPass.
2248class DSELegacyPass : public FunctionPass {
2249public:
2250 static char ID; // Pass identification, replacement for typeid
2251
2252 DSELegacyPass() : FunctionPass(ID) {
2254 }
2255
2256 bool runOnFunction(Function &F) override {
2257 if (skipFunction(F))
2258 return false;
2259
2260 AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
2261 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2262 const TargetLibraryInfo &TLI =
2263 getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
2264 MemorySSA &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
2265 PostDominatorTree &PDT =
2266 getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
2267 AssumptionCache &AC =
2268 getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
2269 LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2270
2271 bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, AC, TLI, LI);
2272
2273#ifdef LLVM_ENABLE_STATS
2275 for (auto &I : instructions(F))
2276 NumRemainingStores += isa<StoreInst>(&I);
2277#endif
2278
2279 return Changed;
2280 }
2281
2282 void getAnalysisUsage(AnalysisUsage &AU) const override {
2283 AU.setPreservesCFG();
2296 }
2297};
2298
2299} // end anonymous namespace
2300
2301char DSELegacyPass::ID = 0;
2302
2303INITIALIZE_PASS_BEGIN(DSELegacyPass, "dse", "Dead Store Elimination", false,
2304 false)
2314INITIALIZE_PASS_END(DSELegacyPass, "dse", "Dead Store Elimination", false,
2315 false)
2316
2318 return new DSELegacyPass();
2319}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file implements a class to represent arbitrary precision integral constant values and operations...
static const Function * getParent(const Value *V)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static bool isShortenableAtTheEnd(Instruction *I)
Returns true if the end of this instruction can be safely shortened in length.
static cl::opt< bool > EnablePartialStoreMerging("enable-dse-partial-store-merging", cl::init(true), cl::Hidden, cl::desc("Enable partial store merging in DSE"))
static bool tryToShortenBegin(Instruction *DeadI, OverlapIntervalsTy &IntervalMap, int64_t &DeadStart, uint64_t &DeadSize)
std::map< int64_t, int64_t > OverlapIntervalsTy
static bool isShortenableAtTheBeginning(Instruction *I)
Returns true if the beginning of this instruction can be safely shortened in length.
static cl::opt< unsigned > MemorySSADefsPerBlockLimit("dse-memoryssa-defs-per-block-limit", cl::init(5000), cl::Hidden, cl::desc("The number of MemoryDefs we consider as candidates to eliminated " "other stores per basic block (default = 5000)"))
static Constant * tryToMergePartialOverlappingStores(StoreInst *KillingI, StoreInst *DeadI, int64_t KillingOffset, int64_t DeadOffset, const DataLayout &DL, BatchAAResults &AA, DominatorTree *DT)
static uint64_t getPointerSize(const Value *V, const DataLayout &DL, const TargetLibraryInfo &TLI, const Function *F)
static bool memoryIsNotModifiedBetween(Instruction *FirstI, Instruction *SecondI, BatchAAResults &AA, const DataLayout &DL, DominatorTree *DT)
Returns true if the memory which is accessed by the second instruction is not modified between the fi...
static OverwriteResult isMaskedStoreOverwrite(const Instruction *KillingI, const Instruction *DeadI, BatchAAResults &AA)
Check if two instruction are masked stores that completely overwrite one another.
static cl::opt< unsigned > MemorySSAOtherBBStepCost("dse-memoryssa-otherbb-cost", cl::init(5), cl::Hidden, cl::desc("The cost of a step in a different basic " "block than the killing MemoryDef" "(default = 5)"))
static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart, uint64_t &DeadSize, int64_t KillingStart, uint64_t KillingSize, bool IsOverwriteEnd)
static cl::opt< unsigned > MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(150), cl::Hidden, cl::desc("The number of memory instructions to scan for " "dead store elimination (default = 150)"))
static cl::opt< unsigned > MemorySSASameBBStepCost("dse-memoryssa-samebb-cost", cl::init(1), cl::Hidden, cl::desc("The cost of a step in the same basic block as the killing MemoryDef" "(default = 1)"))
static cl::opt< bool > EnablePartialOverwriteTracking("enable-dse-partial-overwrite-tracking", cl::init(true), cl::Hidden, cl::desc("Enable partial-overwrite tracking in DSE"))
static OverwriteResult isPartialOverwrite(const MemoryLocation &KillingLoc, const MemoryLocation &DeadLoc, int64_t KillingOff, int64_t DeadOff, Instruction *DeadI, InstOverlapIntervalsTy &IOL)
Return 'OW_Complete' if a store to the 'KillingLoc' location completely overwrites a store to the 'De...
static cl::opt< unsigned > MemorySSAPartialStoreLimit("dse-memoryssa-partial-store-limit", cl::init(5), cl::Hidden, cl::desc("The maximum number candidates that only partially overwrite the " "killing MemoryDef to consider" " (default = 5)"))
static bool tryToShortenEnd(Instruction *DeadI, OverlapIntervalsTy &IntervalMap, int64_t &DeadStart, uint64_t &DeadSize)
static void shortenAssignment(Instruction *Inst, uint64_t OldOffsetInBits, uint64_t OldSizeInBits, uint64_t NewSizeInBits, bool IsOverwriteEnd)
static cl::opt< unsigned > MemorySSAUpwardsStepLimit("dse-memoryssa-walklimit", cl::init(90), cl::Hidden, cl::desc("The maximum number of steps while walking upwards to find " "MemoryDefs that may be killed (default = 90)"))
static cl::opt< bool > OptimizeMemorySSA("dse-optimize-memoryssa", cl::init(true), cl::Hidden, cl::desc("Allow DSE to optimize memory accesses."))
static cl::opt< unsigned > MemorySSAPathCheckLimit("dse-memoryssa-path-check-limit", cl::init(50), cl::Hidden, cl::desc("The maximum number of blocks to check when trying to prove that " "all paths to an exit go through a killing block (default = 50)"))
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
Definition: DebugCounter.h:182
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
uint64_t Addr
uint64_t Size
static bool runOnFunction(Function &F, bool PostInlining)
This is the interface for a simple mod/ref and alias analysis over globals.
Hexagon Common GEP
IRTranslator LLVM IR MI
static void deleteDeadInstruction(Instruction *I)
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Machine Common Subexpression Elimination
Definition: MachineCSE.cpp:169
This file implements a map that provides insertion order iteration.
This file provides utility analysis objects describing memory locations.
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
Module.h This file contains the declarations for the Module class.
print must be executed print the must be executed context for all instructions
Contains a collection of routines for determining if a given instruction is guaranteed to execute if ...
This header defines various interfaces for pass management in LLVM.
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
@ SI
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
A manager for alias analyses.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Class for arbitrary precision integers.
Definition: APInt.h:75
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:973
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:241
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1439
The possible results of an alias query.
Definition: AliasAnalysis.h:83
@ NoAlias
The two locations do not alias at all.
@ PartialAlias
The two locations alias, but only due to a partial overlap.
@ MustAlias
The two locations precisely alias each other.
constexpr int32_t getOffset() const
constexpr bool hasOffset() const
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:620
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:774
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:265
This class represents an incoming formal argument to a Function.
Definition: Argument.h:28
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:112
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:87
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Represents analyses that only rely on functions' control flow.
Definition: PassManager.h:113
This class represents a function call, abstracting a target machine's calling convention.
static CastInst * CreatePointerCast(Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd)
Create a BitCast AddrSpaceCast, or a PtrToInt cast instruction.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:718
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:887
This is an important base class in LLVM.
Definition: Constant.h:41
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:76
Assignment ID.
static DIAssignID * getDistinct(LLVMContext &Context)
static bool fragmentsOverlap(const FragmentInfo &A, const FragmentInfo &B)
Check if fragments overlap between a pair of FragmentInfos.
static std::optional< DIExpression * > createFragmentExpression(const DIExpression *Expr, unsigned OffsetInBits, unsigned SizeInBits)
Create a DIExpression to describe one part of an aggregate variable that is fragmented across multipl...
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:114
static bool shouldExecute(unsigned CounterName)
Definition: DebugCounter.h:72
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
bool erase(const KeyT &Val)
Definition: DenseMap.h:302
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:207
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
NodeT * findNearestCommonDominator(NodeT *A, NodeT *B) const
Find nearest common dominator basic block for basic block A and B.
iterator_range< root_iterator > roots()
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:314
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:335
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
Context-sensitive CaptureInfo provider, which computes and caches the earliest common dominator closu...
void removeInstruction(Instruction *I)
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
const BasicBlock & getEntryBlock() const
Definition: Function.h:735
static GetElementPtrInst * CreateInBounds(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Create an "inbounds" getelementptr.
Definition: Instructions.h:993
Legacy wrapper pass to provide the GlobalsAAResult object.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2550
bool mayWriteToMemory() const LLVM_READONLY
Return true if this instruction may modify memory.
bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
const BasicBlock * getParent() const
Definition: Instruction.h:90
bool mayReadFromMemory() const LLVM_READONLY
Return true if this instruction may read memory.
bool mayThrow() const LLVM_READONLY
Return true if this instruction may throw an exception.
bool isIdenticalTo(const Instruction *I) const LLVM_READONLY
Return true if the specified instruction is exactly identical to the current one.
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:82
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:355
const_iterator begin() const
Definition: IntervalMap.h:1146
bool empty() const
empty - Return true when no intervals are mapped.
Definition: IntervalMap.h:1101
const_iterator end() const
Definition: IntervalMap.h:1158
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47
static LocationSize precise(uint64_t Value)
uint64_t getValue() const
bool isPrecise() const
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:1268
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:992
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:1293
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:547
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:37
iterator end()
Definition: MapVector.h:72
iterator find(const KeyT &Key)
Definition: MapVector.h:148
Value * getLength() const
Value * getValue() const
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
BasicBlock * getBlock() const
Definition: MemorySSA.h:164
Represents a read-write access to memory, whether it is a must-alias, or a may-alias.
Definition: MemorySSA.h:372
void setOptimized(MemoryAccess *MA)
Definition: MemorySSA.h:392
A wrapper analysis pass for the legacy pass manager that exposes a MemoryDepnedenceResults instance.
Representation for a specific memory location.
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
LocationSize Size
The maximum size of the location, in address-units, or UnknownSize if the size is not known.
static MemoryLocation getAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location after Ptr, while remaining within the underlying objec...
MemoryLocation getWithNewPtr(const Value *NewPtr) const
const Value * Ptr
The address of the start of the location.
static MemoryLocation getForDest(const MemIntrinsic *MI)
Return a location representing the destination of a memory set or transfer.
static std::optional< MemoryLocation > getOrNone(const Instruction *Inst)
An analysis that produces MemorySSA for a function.
Definition: MemorySSA.h:936
MemoryAccess * getClobberingMemoryAccess(const Instruction *I, BatchAAResults &AA)
Given a memory Mod/Ref/ModRef'ing instruction, calling this will give you the nearest dominating Memo...
Definition: MemorySSA.h:1046
Legacy analysis pass which computes MemorySSA.
Definition: MemorySSA.h:986
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:700
MemorySSAWalker * getSkipSelfWalker()
Definition: MemorySSA.cpp:1571
MemorySSAWalker * getWalker()
Definition: MemorySSA.cpp:1558
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
Definition: MemorySSA.h:717
void ensureOptimizedUses()
By default, uses are not optimized during MemorySSA construction.
Definition: MemorySSA.cpp:2143
bool isLiveOnEntryDef(const MemoryAccess *MA) const
Return true if MA represents the live on entry value.
Definition: MemorySSA.h:737
MemoryAccess * getDefiningAccess() const
Get the access that produces the memory state used by this Use.
Definition: MemorySSA.h:262
Instruction * getMemoryInst() const
Get the instruction that this MemoryUse represents.
Definition: MemorySSA.h:259
PHITransAddr - An address value which tracks and handles phi translation.
Definition: PHITransAddr.h:35
Value * translateValue(BasicBlock *CurBB, BasicBlock *PredBB, const DominatorTree *DT, bool MustDominate)
translateValue - PHI translate the current address up the CFG from CurBB to Pred, updating our state ...
bool isPotentiallyPHITranslatable() const
isPotentiallyPHITranslatable - If this needs PHI translation, return true if we have some hope of doi...
bool needsPHITranslationFromBlock(BasicBlock *BB) const
needsPHITranslationFromBlock - Return true if moving from the specified BasicBlock to its predecessor...
Definition: PHITransAddr.h:63
Value * getAddr() const
Definition: PHITransAddr.h:59
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Analysis pass which computes a PostDominatorTree.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
bool dominates(const Instruction *I1, const Instruction *I2) const
Return true if I1 dominates I2.
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
void preserveSet()
Mark an analysis set as preserved.
Definition: PassManager.h:188
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:173
A vector that has set insertion semantics.
Definition: SetVector.h:40
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:77
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:383
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
iterator begin() const
Definition: SmallPtrSet.h:403
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:389
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:301
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
An instruction for storing to memory.
Definition: Instructions.h:301
Value * getValueOperand()
Definition: Instructions.h:390
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool has(LibFunc F) const
Tests whether a library function is available.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static IntegerType * getInt8Ty(LLVMContext &C)
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:72
op_range operands()
Definition: User.h:242
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:685
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:994
iterator_range< use_iterator > uses()
Definition: Value.h:376
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:772
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:147
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:537
AssignmentMarkerRange getAssignmentMarkers(DIAssignID *ID)
Return a range of dbg.assign intrinsics which use \ID as an operand.
Definition: DebugInfo.cpp:1750
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:386
Constant * getInitialValueOfAllocation(const Value *V, const TargetLibraryInfo *TLI, Type *Ty)
If this is a call to an allocation function that initializes memory to a fixed value,...
bool isStrongerThanMonotonic(AtomicOrdering AO)
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:145
void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
Definition: Utils.cpp:1367
Value * emitCalloc(Value *Num, Value *Size, IRBuilderBase &B, const TargetLibraryInfo &TLI)
Emit a call to the calloc function.
FunctionPass * createDeadStoreEliminationPass()
Value * GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, const DataLayout &DL, bool AllowNonInbounds=true)
Analyze the specified pointer to see if it can be expressed as a base pointer plus a constant offset.
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value,...
iterator_range< po_iterator< T > > post_order(const T &G)
bool isNoAliasCall(const Value *V)
Return true if this pointer is returned by a noalias function.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1742
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition: Local.cpp:398
bool getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout &DL, const TargetLibraryInfo *TLI, ObjectSizeOpts Opts={})
Compute the size of the object pointed by Ptr.
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:484
bool isModSet(const ModRefInfo MRI)
Definition: ModRef.h:48
bool PointerMayBeCaptured(const Value *V, bool ReturnCaptures, bool StoreCaptures, unsigned MaxUsesToExplore=0)
PointerMayBeCaptured - Return true if this pointer value may be captured by the enclosing function (w...
bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
Definition: Function.cpp:2136
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool AreStatisticsEnabled()
Check if statistics are enabled.
Definition: Statistic.cpp:139
bool isNotVisibleOnUnwind(const Value *Object, bool &RequiresNoCaptureBeforeUnwind)
Return true if Object memory is not visible after an unwind, in the sense that program semantics cann...
void initializeDSELegacyPassPass(PassRegistry &)
uint64_t offsetToAlignment(uint64_t Value, Align Alignment)
Returns the offset to the next integer (mod 2**64) that is greater than or equal to Value and is a mu...
Definition: Alignment.h:197
void salvageKnowledge(Instruction *I, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr)
Calls BuildAssumeFromInst and if the resulting llvm.assume is valid insert if before I.
Value * getFreedOperand(const CallBase *CB, const TargetLibraryInfo *TLI)
If this if a call to a free function, return the freed operand.
Value * isBytewiseValue(Value *V, const DataLayout &DL)
If the specified value can be set by repeating the same byte in memory, return the i8 value that it i...
auto predecessors(const MachineBasicBlock *BB)
bool mayContainIrreducibleControl(const Function &F, const LoopInfo *LI)
bool isStrongerThan(AtomicOrdering AO, AtomicOrdering Other)
Returns true if ao is stronger than other as defined by the AtomicOrdering lattice,...
bool isRefSet(const ModRefInfo MRI)
Definition: ModRef.h:51
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
Definition: CodeMetrics.cpp:70
Holds the characteristics of one fragment of a larger variable.
Various options to control the behavior of getObjectSize.
bool NullIsUnknownSize
If this is true, null pointers in address space 0 will be treated as though they can't be evaluated.