LLVM 18.0.0git
DeadStoreElimination.cpp
Go to the documentation of this file.
1//===- DeadStoreElimination.cpp - MemorySSA Backed Dead Store Elimination -===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// The code below implements dead store elimination using MemorySSA. It uses
10// the following general approach: given a MemoryDef, walk upwards to find
11// clobbering MemoryDefs that may be killed by the starting def. Then check
12// that there are no uses that may read the location of the original MemoryDef
13// in between both MemoryDefs. A bit more concretely:
14//
15// For all MemoryDefs StartDef:
16// 1. Get the next dominating clobbering MemoryDef (MaybeDeadAccess) by walking
17// upwards.
18// 2. Check that there are no reads between MaybeDeadAccess and the StartDef by
19// checking all uses starting at MaybeDeadAccess and walking until we see
20// StartDef.
21// 3. For each found CurrentDef, check that:
22// 1. There are no barrier instructions between CurrentDef and StartDef (like
23// throws or stores with ordering constraints).
24// 2. StartDef is executed whenever CurrentDef is executed.
25// 3. StartDef completely overwrites CurrentDef.
26// 4. Erase CurrentDef from the function and MemorySSA.
27//
28//===----------------------------------------------------------------------===//
29
31#include "llvm/ADT/APInt.h"
32#include "llvm/ADT/DenseMap.h"
33#include "llvm/ADT/MapVector.h"
35#include "llvm/ADT/SetVector.h"
38#include "llvm/ADT/Statistic.h"
39#include "llvm/ADT/StringRef.h"
52#include "llvm/IR/Argument.h"
53#include "llvm/IR/BasicBlock.h"
54#include "llvm/IR/Constant.h"
55#include "llvm/IR/Constants.h"
56#include "llvm/IR/DataLayout.h"
57#include "llvm/IR/DebugInfo.h"
58#include "llvm/IR/Dominators.h"
59#include "llvm/IR/Function.h"
60#include "llvm/IR/IRBuilder.h"
62#include "llvm/IR/InstrTypes.h"
63#include "llvm/IR/Instruction.h"
66#include "llvm/IR/Module.h"
67#include "llvm/IR/PassManager.h"
69#include "llvm/IR/Value.h"
72#include "llvm/Support/Debug.h"
79#include <algorithm>
80#include <cassert>
81#include <cstdint>
82#include <iterator>
83#include <map>
84#include <optional>
85#include <utility>
86
87using namespace llvm;
88using namespace PatternMatch;
89
90#define DEBUG_TYPE "dse"
91
92STATISTIC(NumRemainingStores, "Number of stores remaining after DSE");
93STATISTIC(NumRedundantStores, "Number of redundant stores deleted");
94STATISTIC(NumFastStores, "Number of stores deleted");
95STATISTIC(NumFastOther, "Number of other instrs removed");
96STATISTIC(NumCompletePartials, "Number of stores dead by later partials");
97STATISTIC(NumModifiedStores, "Number of stores modified");
98STATISTIC(NumCFGChecks, "Number of stores modified");
99STATISTIC(NumCFGTries, "Number of stores modified");
100STATISTIC(NumCFGSuccess, "Number of stores modified");
101STATISTIC(NumGetDomMemoryDefPassed,
102 "Number of times a valid candidate is returned from getDomMemoryDef");
103STATISTIC(NumDomMemDefChecks,
104 "Number iterations check for reads in getDomMemoryDef");
105
106DEBUG_COUNTER(MemorySSACounter, "dse-memoryssa",
107 "Controls which MemoryDefs are eliminated.");
108
109static cl::opt<bool>
110EnablePartialOverwriteTracking("enable-dse-partial-overwrite-tracking",
111 cl::init(true), cl::Hidden,
112 cl::desc("Enable partial-overwrite tracking in DSE"));
113
114static cl::opt<bool>
115EnablePartialStoreMerging("enable-dse-partial-store-merging",
116 cl::init(true), cl::Hidden,
117 cl::desc("Enable partial store merging in DSE"));
118
120 MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(150), cl::Hidden,
121 cl::desc("The number of memory instructions to scan for "
122 "dead store elimination (default = 150)"));
124 "dse-memoryssa-walklimit", cl::init(90), cl::Hidden,
125 cl::desc("The maximum number of steps while walking upwards to find "
126 "MemoryDefs that may be killed (default = 90)"));
127
129 "dse-memoryssa-partial-store-limit", cl::init(5), cl::Hidden,
130 cl::desc("The maximum number candidates that only partially overwrite the "
131 "killing MemoryDef to consider"
132 " (default = 5)"));
133
135 "dse-memoryssa-defs-per-block-limit", cl::init(5000), cl::Hidden,
136 cl::desc("The number of MemoryDefs we consider as candidates to eliminated "
137 "other stores per basic block (default = 5000)"));
138
140 "dse-memoryssa-samebb-cost", cl::init(1), cl::Hidden,
141 cl::desc(
142 "The cost of a step in the same basic block as the killing MemoryDef"
143 "(default = 1)"));
144
146 MemorySSAOtherBBStepCost("dse-memoryssa-otherbb-cost", cl::init(5),
148 cl::desc("The cost of a step in a different basic "
149 "block than the killing MemoryDef"
150 "(default = 5)"));
151
153 "dse-memoryssa-path-check-limit", cl::init(50), cl::Hidden,
154 cl::desc("The maximum number of blocks to check when trying to prove that "
155 "all paths to an exit go through a killing block (default = 50)"));
156
157// This flags allows or disallows DSE to optimize MemorySSA during its
158// traversal. Note that DSE optimizing MemorySSA may impact other passes
159// downstream of the DSE invocation and can lead to issues not being
160// reproducible in isolation (i.e. when MemorySSA is built from scratch). In
161// those cases, the flag can be used to check if DSE's MemorySSA optimizations
162// impact follow-up passes.
163static cl::opt<bool>
164 OptimizeMemorySSA("dse-optimize-memoryssa", cl::init(true), cl::Hidden,
165 cl::desc("Allow DSE to optimize memory accesses."));
166
167//===----------------------------------------------------------------------===//
168// Helper functions
169//===----------------------------------------------------------------------===//
170using OverlapIntervalsTy = std::map<int64_t, int64_t>;
172
173/// Returns true if the end of this instruction can be safely shortened in
174/// length.
176 // Don't shorten stores for now
177 if (isa<StoreInst>(I))
178 return false;
179
180 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
181 switch (II->getIntrinsicID()) {
182 default: return false;
183 case Intrinsic::memset:
184 case Intrinsic::memcpy:
185 case Intrinsic::memcpy_element_unordered_atomic:
186 case Intrinsic::memset_element_unordered_atomic:
187 // Do shorten memory intrinsics.
188 // FIXME: Add memmove if it's also safe to transform.
189 return true;
190 }
191 }
192
193 // Don't shorten libcalls calls for now.
194
195 return false;
196}
197
198/// Returns true if the beginning of this instruction can be safely shortened
199/// in length.
201 // FIXME: Handle only memset for now. Supporting memcpy/memmove should be
202 // easily done by offsetting the source address.
203 return isa<AnyMemSetInst>(I);
204}
205
206static std::optional<TypeSize> getPointerSize(const Value *V,
207 const DataLayout &DL,
208 const TargetLibraryInfo &TLI,
209 const Function *F) {
211 ObjectSizeOpts Opts;
213
214 if (getObjectSize(V, Size, DL, &TLI, Opts))
215 return TypeSize::getFixed(Size);
216 return std::nullopt;
217}
218
219namespace {
220
221enum OverwriteResult {
222 OW_Begin,
223 OW_Complete,
224 OW_End,
225 OW_PartialEarlierWithFullLater,
226 OW_MaybePartial,
227 OW_None,
228 OW_Unknown
229};
230
231} // end anonymous namespace
232
233/// Check if two instruction are masked stores that completely
234/// overwrite one another. More specifically, \p KillingI has to
235/// overwrite \p DeadI.
236static OverwriteResult isMaskedStoreOverwrite(const Instruction *KillingI,
237 const Instruction *DeadI,
238 BatchAAResults &AA) {
239 const auto *KillingII = dyn_cast<IntrinsicInst>(KillingI);
240 const auto *DeadII = dyn_cast<IntrinsicInst>(DeadI);
241 if (KillingII == nullptr || DeadII == nullptr)
242 return OW_Unknown;
243 if (KillingII->getIntrinsicID() != DeadII->getIntrinsicID())
244 return OW_Unknown;
245 if (KillingII->getIntrinsicID() == Intrinsic::masked_store) {
246 // Type size.
247 VectorType *KillingTy =
248 cast<VectorType>(KillingII->getArgOperand(0)->getType());
249 VectorType *DeadTy = cast<VectorType>(DeadII->getArgOperand(0)->getType());
250 if (KillingTy->getScalarSizeInBits() != DeadTy->getScalarSizeInBits())
251 return OW_Unknown;
252 // Element count.
253 if (KillingTy->getElementCount() != DeadTy->getElementCount())
254 return OW_Unknown;
255 // Pointers.
256 Value *KillingPtr = KillingII->getArgOperand(1)->stripPointerCasts();
257 Value *DeadPtr = DeadII->getArgOperand(1)->stripPointerCasts();
258 if (KillingPtr != DeadPtr && !AA.isMustAlias(KillingPtr, DeadPtr))
259 return OW_Unknown;
260 // Masks.
261 // TODO: check that KillingII's mask is a superset of the DeadII's mask.
262 if (KillingII->getArgOperand(3) != DeadII->getArgOperand(3))
263 return OW_Unknown;
264 return OW_Complete;
265 }
266 return OW_Unknown;
267}
268
269/// Return 'OW_Complete' if a store to the 'KillingLoc' location completely
270/// overwrites a store to the 'DeadLoc' location, 'OW_End' if the end of the
271/// 'DeadLoc' location is completely overwritten by 'KillingLoc', 'OW_Begin'
272/// if the beginning of the 'DeadLoc' location is overwritten by 'KillingLoc'.
273/// 'OW_PartialEarlierWithFullLater' means that a dead (big) store was
274/// overwritten by a killing (smaller) store which doesn't write outside the big
275/// store's memory locations. Returns 'OW_Unknown' if nothing can be determined.
276/// NOTE: This function must only be called if both \p KillingLoc and \p
277/// DeadLoc belong to the same underlying object with valid \p KillingOff and
278/// \p DeadOff.
279static OverwriteResult isPartialOverwrite(const MemoryLocation &KillingLoc,
280 const MemoryLocation &DeadLoc,
281 int64_t KillingOff, int64_t DeadOff,
282 Instruction *DeadI,
284 const uint64_t KillingSize = KillingLoc.Size.getValue();
285 const uint64_t DeadSize = DeadLoc.Size.getValue();
286 // We may now overlap, although the overlap is not complete. There might also
287 // be other incomplete overlaps, and together, they might cover the complete
288 // dead store.
289 // Note: The correctness of this logic depends on the fact that this function
290 // is not even called providing DepWrite when there are any intervening reads.
292 KillingOff < int64_t(DeadOff + DeadSize) &&
293 int64_t(KillingOff + KillingSize) >= DeadOff) {
294
295 // Insert our part of the overlap into the map.
296 auto &IM = IOL[DeadI];
297 LLVM_DEBUG(dbgs() << "DSE: Partial overwrite: DeadLoc [" << DeadOff << ", "
298 << int64_t(DeadOff + DeadSize) << ") KillingLoc ["
299 << KillingOff << ", " << int64_t(KillingOff + KillingSize)
300 << ")\n");
301
302 // Make sure that we only insert non-overlapping intervals and combine
303 // adjacent intervals. The intervals are stored in the map with the ending
304 // offset as the key (in the half-open sense) and the starting offset as
305 // the value.
306 int64_t KillingIntStart = KillingOff;
307 int64_t KillingIntEnd = KillingOff + KillingSize;
308
309 // Find any intervals ending at, or after, KillingIntStart which start
310 // before KillingIntEnd.
311 auto ILI = IM.lower_bound(KillingIntStart);
312 if (ILI != IM.end() && ILI->second <= KillingIntEnd) {
313 // This existing interval is overlapped with the current store somewhere
314 // in [KillingIntStart, KillingIntEnd]. Merge them by erasing the existing
315 // intervals and adjusting our start and end.
316 KillingIntStart = std::min(KillingIntStart, ILI->second);
317 KillingIntEnd = std::max(KillingIntEnd, ILI->first);
318 ILI = IM.erase(ILI);
319
320 // Continue erasing and adjusting our end in case other previous
321 // intervals are also overlapped with the current store.
322 //
323 // |--- dead 1 ---| |--- dead 2 ---|
324 // |------- killing---------|
325 //
326 while (ILI != IM.end() && ILI->second <= KillingIntEnd) {
327 assert(ILI->second > KillingIntStart && "Unexpected interval");
328 KillingIntEnd = std::max(KillingIntEnd, ILI->first);
329 ILI = IM.erase(ILI);
330 }
331 }
332
333 IM[KillingIntEnd] = KillingIntStart;
334
335 ILI = IM.begin();
336 if (ILI->second <= DeadOff && ILI->first >= int64_t(DeadOff + DeadSize)) {
337 LLVM_DEBUG(dbgs() << "DSE: Full overwrite from partials: DeadLoc ["
338 << DeadOff << ", " << int64_t(DeadOff + DeadSize)
339 << ") Composite KillingLoc [" << ILI->second << ", "
340 << ILI->first << ")\n");
341 ++NumCompletePartials;
342 return OW_Complete;
343 }
344 }
345
346 // Check for a dead store which writes to all the memory locations that
347 // the killing store writes to.
348 if (EnablePartialStoreMerging && KillingOff >= DeadOff &&
349 int64_t(DeadOff + DeadSize) > KillingOff &&
350 uint64_t(KillingOff - DeadOff) + KillingSize <= DeadSize) {
351 LLVM_DEBUG(dbgs() << "DSE: Partial overwrite a dead load [" << DeadOff
352 << ", " << int64_t(DeadOff + DeadSize)
353 << ") by a killing store [" << KillingOff << ", "
354 << int64_t(KillingOff + KillingSize) << ")\n");
355 // TODO: Maybe come up with a better name?
356 return OW_PartialEarlierWithFullLater;
357 }
358
359 // Another interesting case is if the killing store overwrites the end of the
360 // dead store.
361 //
362 // |--dead--|
363 // |-- killing --|
364 //
365 // In this case we may want to trim the size of dead store to avoid
366 // generating stores to addresses which will definitely be overwritten killing
367 // store.
369 (KillingOff > DeadOff && KillingOff < int64_t(DeadOff + DeadSize) &&
370 int64_t(KillingOff + KillingSize) >= int64_t(DeadOff + DeadSize)))
371 return OW_End;
372
373 // Finally, we also need to check if the killing store overwrites the
374 // beginning of the dead store.
375 //
376 // |--dead--|
377 // |-- killing --|
378 //
379 // In this case we may want to move the destination address and trim the size
380 // of dead store to avoid generating stores to addresses which will definitely
381 // be overwritten killing store.
383 (KillingOff <= DeadOff && int64_t(KillingOff + KillingSize) > DeadOff)) {
384 assert(int64_t(KillingOff + KillingSize) < int64_t(DeadOff + DeadSize) &&
385 "Expect to be handled as OW_Complete");
386 return OW_Begin;
387 }
388 // Otherwise, they don't completely overlap.
389 return OW_Unknown;
390}
391
392/// Returns true if the memory which is accessed by the second instruction is not
393/// modified between the first and the second instruction.
394/// Precondition: Second instruction must be dominated by the first
395/// instruction.
396static bool
398 BatchAAResults &AA, const DataLayout &DL,
399 DominatorTree *DT) {
400 // Do a backwards scan through the CFG from SecondI to FirstI. Look for
401 // instructions which can modify the memory location accessed by SecondI.
402 //
403 // While doing the walk keep track of the address to check. It might be
404 // different in different basic blocks due to PHI translation.
405 using BlockAddressPair = std::pair<BasicBlock *, PHITransAddr>;
407 // Keep track of the address we visited each block with. Bail out if we
408 // visit a block with different addresses.
410
411 BasicBlock::iterator FirstBBI(FirstI);
412 ++FirstBBI;
413 BasicBlock::iterator SecondBBI(SecondI);
414 BasicBlock *FirstBB = FirstI->getParent();
415 BasicBlock *SecondBB = SecondI->getParent();
416 MemoryLocation MemLoc;
417 if (auto *MemSet = dyn_cast<MemSetInst>(SecondI))
418 MemLoc = MemoryLocation::getForDest(MemSet);
419 else
420 MemLoc = MemoryLocation::get(SecondI);
421
422 auto *MemLocPtr = const_cast<Value *>(MemLoc.Ptr);
423
424 // Start checking the SecondBB.
425 WorkList.push_back(
426 std::make_pair(SecondBB, PHITransAddr(MemLocPtr, DL, nullptr)));
427 bool isFirstBlock = true;
428
429 // Check all blocks going backward until we reach the FirstBB.
430 while (!WorkList.empty()) {
431 BlockAddressPair Current = WorkList.pop_back_val();
432 BasicBlock *B = Current.first;
433 PHITransAddr &Addr = Current.second;
434 Value *Ptr = Addr.getAddr();
435
436 // Ignore instructions before FirstI if this is the FirstBB.
437 BasicBlock::iterator BI = (B == FirstBB ? FirstBBI : B->begin());
438
440 if (isFirstBlock) {
441 // Ignore instructions after SecondI if this is the first visit of SecondBB.
442 assert(B == SecondBB && "first block is not the store block");
443 EI = SecondBBI;
444 isFirstBlock = false;
445 } else {
446 // It's not SecondBB or (in case of a loop) the second visit of SecondBB.
447 // In this case we also have to look at instructions after SecondI.
448 EI = B->end();
449 }
450 for (; BI != EI; ++BI) {
451 Instruction *I = &*BI;
452 if (I->mayWriteToMemory() && I != SecondI)
453 if (isModSet(AA.getModRefInfo(I, MemLoc.getWithNewPtr(Ptr))))
454 return false;
455 }
456 if (B != FirstBB) {
457 assert(B != &FirstBB->getParent()->getEntryBlock() &&
458 "Should not hit the entry block because SI must be dominated by LI");
459 for (BasicBlock *Pred : predecessors(B)) {
460 PHITransAddr PredAddr = Addr;
461 if (PredAddr.needsPHITranslationFromBlock(B)) {
462 if (!PredAddr.isPotentiallyPHITranslatable())
463 return false;
464 if (!PredAddr.translateValue(B, Pred, DT, false))
465 return false;
466 }
467 Value *TranslatedPtr = PredAddr.getAddr();
468 auto Inserted = Visited.insert(std::make_pair(Pred, TranslatedPtr));
469 if (!Inserted.second) {
470 // We already visited this block before. If it was with a different
471 // address - bail out!
472 if (TranslatedPtr != Inserted.first->second)
473 return false;
474 // ... otherwise just skip it.
475 continue;
476 }
477 WorkList.push_back(std::make_pair(Pred, PredAddr));
478 }
479 }
480 }
481 return true;
482}
483
484static void shortenAssignment(Instruction *Inst, Value *OriginalDest,
485 uint64_t OldOffsetInBits, uint64_t OldSizeInBits,
486 uint64_t NewSizeInBits, bool IsOverwriteEnd) {
487 const DataLayout &DL = Inst->getModule()->getDataLayout();
488 uint64_t DeadSliceSizeInBits = OldSizeInBits - NewSizeInBits;
489 uint64_t DeadSliceOffsetInBits =
490 OldOffsetInBits + (IsOverwriteEnd ? NewSizeInBits : 0);
491 auto SetDeadFragExpr = [](DbgAssignIntrinsic *DAI,
492 DIExpression::FragmentInfo DeadFragment) {
493 // createFragmentExpression expects an offset relative to the existing
494 // fragment offset if there is one.
495 uint64_t RelativeOffset = DeadFragment.OffsetInBits -
496 DAI->getExpression()
498 .value_or(DIExpression::FragmentInfo(0, 0))
499 .OffsetInBits;
501 DAI->getExpression(), RelativeOffset, DeadFragment.SizeInBits)) {
502 DAI->setExpression(*NewExpr);
503 return;
504 }
505 // Failed to create a fragment expression for this so discard the value,
506 // making this a kill location.
508 DIExpression::get(DAI->getContext(), std::nullopt),
509 DeadFragment.OffsetInBits, DeadFragment.SizeInBits);
510 DAI->setExpression(Expr);
511 DAI->setKillLocation();
512 };
513
514 // A DIAssignID to use so that the inserted dbg.assign intrinsics do not
515 // link to any instructions. Created in the loop below (once).
516 DIAssignID *LinkToNothing = nullptr;
517 LLVMContext &Ctx = Inst->getContext();
518 auto GetDeadLink = [&Ctx, &LinkToNothing]() {
519 if (!LinkToNothing)
520 LinkToNothing = DIAssignID::getDistinct(Ctx);
521 return LinkToNothing;
522 };
523
524 // Insert an unlinked dbg.assign intrinsic for the dead fragment after each
525 // overlapping dbg.assign intrinsic. The loop invalidates the iterators
526 // returned by getAssignmentMarkers so save a copy of the markers to iterate
527 // over.
528 auto LinkedRange = at::getAssignmentMarkers(Inst);
529 SmallVector<DbgAssignIntrinsic *> Linked(LinkedRange.begin(),
530 LinkedRange.end());
531 for (auto *DAI : Linked) {
532 std::optional<DIExpression::FragmentInfo> NewFragment;
533 if (!at::calculateFragmentIntersect(DL, OriginalDest, DeadSliceOffsetInBits,
534 DeadSliceSizeInBits, DAI,
535 NewFragment) ||
536 !NewFragment) {
537 // We couldn't calculate the intersecting fragment for some reason. Be
538 // cautious and unlink the whole assignment from the store.
539 DAI->setKillAddress();
540 DAI->setAssignId(GetDeadLink());
541 continue;
542 }
543 // No intersect.
544 if (NewFragment->SizeInBits == 0)
545 continue;
546
547 // Fragments overlap: insert a new dbg.assign for this dead part.
548 auto *NewAssign = cast<DbgAssignIntrinsic>(DAI->clone());
549 NewAssign->insertAfter(DAI);
550 NewAssign->setAssignId(GetDeadLink());
551 if (NewFragment)
552 SetDeadFragExpr(NewAssign, *NewFragment);
553 NewAssign->setKillAddress();
554 }
555}
556
557static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart,
558 uint64_t &DeadSize, int64_t KillingStart,
559 uint64_t KillingSize, bool IsOverwriteEnd) {
560 auto *DeadIntrinsic = cast<AnyMemIntrinsic>(DeadI);
561 Align PrefAlign = DeadIntrinsic->getDestAlign().valueOrOne();
562
563 // We assume that memet/memcpy operates in chunks of the "largest" native
564 // type size and aligned on the same value. That means optimal start and size
565 // of memset/memcpy should be modulo of preferred alignment of that type. That
566 // is it there is no any sense in trying to reduce store size any further
567 // since any "extra" stores comes for free anyway.
568 // On the other hand, maximum alignment we can achieve is limited by alignment
569 // of initial store.
570
571 // TODO: Limit maximum alignment by preferred (or abi?) alignment of the
572 // "largest" native type.
573 // Note: What is the proper way to get that value?
574 // Should TargetTransformInfo::getRegisterBitWidth be used or anything else?
575 // PrefAlign = std::min(DL.getPrefTypeAlign(LargestType), PrefAlign);
576
577 int64_t ToRemoveStart = 0;
578 uint64_t ToRemoveSize = 0;
579 // Compute start and size of the region to remove. Make sure 'PrefAlign' is
580 // maintained on the remaining store.
581 if (IsOverwriteEnd) {
582 // Calculate required adjustment for 'KillingStart' in order to keep
583 // remaining store size aligned on 'PerfAlign'.
584 uint64_t Off =
585 offsetToAlignment(uint64_t(KillingStart - DeadStart), PrefAlign);
586 ToRemoveStart = KillingStart + Off;
587 if (DeadSize <= uint64_t(ToRemoveStart - DeadStart))
588 return false;
589 ToRemoveSize = DeadSize - uint64_t(ToRemoveStart - DeadStart);
590 } else {
591 ToRemoveStart = DeadStart;
592 assert(KillingSize >= uint64_t(DeadStart - KillingStart) &&
593 "Not overlapping accesses?");
594 ToRemoveSize = KillingSize - uint64_t(DeadStart - KillingStart);
595 // Calculate required adjustment for 'ToRemoveSize'in order to keep
596 // start of the remaining store aligned on 'PerfAlign'.
597 uint64_t Off = offsetToAlignment(ToRemoveSize, PrefAlign);
598 if (Off != 0) {
599 if (ToRemoveSize <= (PrefAlign.value() - Off))
600 return false;
601 ToRemoveSize -= PrefAlign.value() - Off;
602 }
603 assert(isAligned(PrefAlign, ToRemoveSize) &&
604 "Should preserve selected alignment");
605 }
606
607 assert(ToRemoveSize > 0 && "Shouldn't reach here if nothing to remove");
608 assert(DeadSize > ToRemoveSize && "Can't remove more than original size");
609
610 uint64_t NewSize = DeadSize - ToRemoveSize;
611 if (auto *AMI = dyn_cast<AtomicMemIntrinsic>(DeadI)) {
612 // When shortening an atomic memory intrinsic, the newly shortened
613 // length must remain an integer multiple of the element size.
614 const uint32_t ElementSize = AMI->getElementSizeInBytes();
615 if (0 != NewSize % ElementSize)
616 return false;
617 }
618
619 LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW "
620 << (IsOverwriteEnd ? "END" : "BEGIN") << ": " << *DeadI
621 << "\n KILLER [" << ToRemoveStart << ", "
622 << int64_t(ToRemoveStart + ToRemoveSize) << ")\n");
623
624 Value *DeadWriteLength = DeadIntrinsic->getLength();
625 Value *TrimmedLength = ConstantInt::get(DeadWriteLength->getType(), NewSize);
626 DeadIntrinsic->setLength(TrimmedLength);
627 DeadIntrinsic->setDestAlignment(PrefAlign);
628
629 Value *OrigDest = DeadIntrinsic->getRawDest();
630 if (!IsOverwriteEnd) {
631 Value *Indices[1] = {
632 ConstantInt::get(DeadWriteLength->getType(), ToRemoveSize)};
634 Type::getInt8Ty(DeadIntrinsic->getContext()), OrigDest, Indices, "", DeadI);
635 NewDestGEP->setDebugLoc(DeadIntrinsic->getDebugLoc());
636 DeadIntrinsic->setDest(NewDestGEP);
637 }
638
639 // Update attached dbg.assign intrinsics. Assume 8-bit byte.
640 shortenAssignment(DeadI, OrigDest, DeadStart * 8, DeadSize * 8, NewSize * 8,
641 IsOverwriteEnd);
642
643 // Finally update start and size of dead access.
644 if (!IsOverwriteEnd)
645 DeadStart += ToRemoveSize;
646 DeadSize = NewSize;
647
648 return true;
649}
650
652 int64_t &DeadStart, uint64_t &DeadSize) {
653 if (IntervalMap.empty() || !isShortenableAtTheEnd(DeadI))
654 return false;
655
656 OverlapIntervalsTy::iterator OII = --IntervalMap.end();
657 int64_t KillingStart = OII->second;
658 uint64_t KillingSize = OII->first - KillingStart;
659
660 assert(OII->first - KillingStart >= 0 && "Size expected to be positive");
661
662 if (KillingStart > DeadStart &&
663 // Note: "KillingStart - KillingStart" is known to be positive due to
664 // preceding check.
665 (uint64_t)(KillingStart - DeadStart) < DeadSize &&
666 // Note: "DeadSize - (uint64_t)(KillingStart - DeadStart)" is known to
667 // be non negative due to preceding checks.
668 KillingSize >= DeadSize - (uint64_t)(KillingStart - DeadStart)) {
669 if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
670 true)) {
671 IntervalMap.erase(OII);
672 return true;
673 }
674 }
675 return false;
676}
677
680 int64_t &DeadStart, uint64_t &DeadSize) {
682 return false;
683
684 OverlapIntervalsTy::iterator OII = IntervalMap.begin();
685 int64_t KillingStart = OII->second;
686 uint64_t KillingSize = OII->first - KillingStart;
687
688 assert(OII->first - KillingStart >= 0 && "Size expected to be positive");
689
690 if (KillingStart <= DeadStart &&
691 // Note: "DeadStart - KillingStart" is known to be non negative due to
692 // preceding check.
693 KillingSize > (uint64_t)(DeadStart - KillingStart)) {
694 // Note: "KillingSize - (uint64_t)(DeadStart - DeadStart)" is known to
695 // be positive due to preceding checks.
696 assert(KillingSize - (uint64_t)(DeadStart - KillingStart) < DeadSize &&
697 "Should have been handled as OW_Complete");
698 if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
699 false)) {
700 IntervalMap.erase(OII);
701 return true;
702 }
703 }
704 return false;
705}
706
707static Constant *
709 int64_t KillingOffset, int64_t DeadOffset,
710 const DataLayout &DL, BatchAAResults &AA,
711 DominatorTree *DT) {
712
713 if (DeadI && isa<ConstantInt>(DeadI->getValueOperand()) &&
714 DL.typeSizeEqualsStoreSize(DeadI->getValueOperand()->getType()) &&
715 KillingI && isa<ConstantInt>(KillingI->getValueOperand()) &&
716 DL.typeSizeEqualsStoreSize(KillingI->getValueOperand()->getType()) &&
717 memoryIsNotModifiedBetween(DeadI, KillingI, AA, DL, DT)) {
718 // If the store we find is:
719 // a) partially overwritten by the store to 'Loc'
720 // b) the killing store is fully contained in the dead one and
721 // c) they both have a constant value
722 // d) none of the two stores need padding
723 // Merge the two stores, replacing the dead store's value with a
724 // merge of both values.
725 // TODO: Deal with other constant types (vectors, etc), and probably
726 // some mem intrinsics (if needed)
727
728 APInt DeadValue = cast<ConstantInt>(DeadI->getValueOperand())->getValue();
729 APInt KillingValue =
730 cast<ConstantInt>(KillingI->getValueOperand())->getValue();
731 unsigned KillingBits = KillingValue.getBitWidth();
732 assert(DeadValue.getBitWidth() > KillingValue.getBitWidth());
733 KillingValue = KillingValue.zext(DeadValue.getBitWidth());
734
735 // Offset of the smaller store inside the larger store
736 unsigned BitOffsetDiff = (KillingOffset - DeadOffset) * 8;
737 unsigned LShiftAmount =
738 DL.isBigEndian() ? DeadValue.getBitWidth() - BitOffsetDiff - KillingBits
739 : BitOffsetDiff;
740 APInt Mask = APInt::getBitsSet(DeadValue.getBitWidth(), LShiftAmount,
741 LShiftAmount + KillingBits);
742 // Clear the bits we'll be replacing, then OR with the smaller
743 // store, shifted appropriately.
744 APInt Merged = (DeadValue & ~Mask) | (KillingValue << LShiftAmount);
745 LLVM_DEBUG(dbgs() << "DSE: Merge Stores:\n Dead: " << *DeadI
746 << "\n Killing: " << *KillingI
747 << "\n Merged Value: " << Merged << '\n');
748 return ConstantInt::get(DeadI->getValueOperand()->getType(), Merged);
749 }
750 return nullptr;
751}
752
753namespace {
754// Returns true if \p I is an intrinsic that does not read or write memory.
755bool isNoopIntrinsic(Instruction *I) {
756 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
757 switch (II->getIntrinsicID()) {
758 case Intrinsic::lifetime_start:
759 case Intrinsic::lifetime_end:
760 case Intrinsic::invariant_end:
761 case Intrinsic::launder_invariant_group:
762 case Intrinsic::assume:
763 return true;
764 case Intrinsic::dbg_declare:
765 case Intrinsic::dbg_label:
766 case Intrinsic::dbg_value:
767 llvm_unreachable("Intrinsic should not be modeled in MemorySSA");
768 default:
769 return false;
770 }
771 }
772 return false;
773}
774
775// Check if we can ignore \p D for DSE.
776bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) {
777 Instruction *DI = D->getMemoryInst();
778 // Calls that only access inaccessible memory cannot read or write any memory
779 // locations we consider for elimination.
780 if (auto *CB = dyn_cast<CallBase>(DI))
781 if (CB->onlyAccessesInaccessibleMemory())
782 return true;
783
784 // We can eliminate stores to locations not visible to the caller across
785 // throwing instructions.
786 if (DI->mayThrow() && !DefVisibleToCaller)
787 return true;
788
789 // We can remove the dead stores, irrespective of the fence and its ordering
790 // (release/acquire/seq_cst). Fences only constraints the ordering of
791 // already visible stores, it does not make a store visible to other
792 // threads. So, skipping over a fence does not change a store from being
793 // dead.
794 if (isa<FenceInst>(DI))
795 return true;
796
797 // Skip intrinsics that do not really read or modify memory.
798 if (isNoopIntrinsic(DI))
799 return true;
800
801 return false;
802}
803
804struct DSEState {
805 Function &F;
806 AliasAnalysis &AA;
808
809 /// The single BatchAA instance that is used to cache AA queries. It will
810 /// not be invalidated over the whole run. This is safe, because:
811 /// 1. Only memory writes are removed, so the alias cache for memory
812 /// locations remains valid.
813 /// 2. No new instructions are added (only instructions removed), so cached
814 /// information for a deleted value cannot be accessed by a re-used new
815 /// value pointer.
816 BatchAAResults BatchAA;
817
818 MemorySSA &MSSA;
819 DominatorTree &DT;
821 const TargetLibraryInfo &TLI;
822 const DataLayout &DL;
823 const LoopInfo &LI;
824
825 // Whether the function contains any irreducible control flow, useful for
826 // being accurately able to detect loops.
827 bool ContainsIrreducibleLoops;
828
829 // All MemoryDefs that potentially could kill other MemDefs.
831 // Any that should be skipped as they are already deleted
833 // Keep track whether a given object is captured before return or not.
834 DenseMap<const Value *, bool> CapturedBeforeReturn;
835 // Keep track of all of the objects that are invisible to the caller after
836 // the function returns.
837 DenseMap<const Value *, bool> InvisibleToCallerAfterRet;
838 // Keep track of blocks with throwing instructions not modeled in MemorySSA.
839 SmallPtrSet<BasicBlock *, 16> ThrowingBlocks;
840 // Post-order numbers for each basic block. Used to figure out if memory
841 // accesses are executed before another access.
842 DenseMap<BasicBlock *, unsigned> PostOrderNumbers;
843
844 /// Keep track of instructions (partly) overlapping with killing MemoryDefs per
845 /// basic block.
847 // Check if there are root nodes that are terminated by UnreachableInst.
848 // Those roots pessimize post-dominance queries. If there are such roots,
849 // fall back to CFG scan starting from all non-unreachable roots.
850 bool AnyUnreachableExit;
851
852 // Whether or not we should iterate on removing dead stores at the end of the
853 // function due to removing a store causing a previously captured pointer to
854 // no longer be captured.
855 bool ShouldIterateEndOfFunctionDSE;
856
857 // Class contains self-reference, make sure it's not copied/moved.
858 DSEState(const DSEState &) = delete;
859 DSEState &operator=(const DSEState &) = delete;
860
861 DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT,
862 PostDominatorTree &PDT, const TargetLibraryInfo &TLI,
863 const LoopInfo &LI)
864 : F(F), AA(AA), EI(DT, &LI), BatchAA(AA, &EI), MSSA(MSSA), DT(DT),
865 PDT(PDT), TLI(TLI), DL(F.getParent()->getDataLayout()), LI(LI) {
866 // Collect blocks with throwing instructions not modeled in MemorySSA and
867 // alloc-like objects.
868 unsigned PO = 0;
869 for (BasicBlock *BB : post_order(&F)) {
870 PostOrderNumbers[BB] = PO++;
871 for (Instruction &I : *BB) {
872 MemoryAccess *MA = MSSA.getMemoryAccess(&I);
873 if (I.mayThrow() && !MA)
874 ThrowingBlocks.insert(I.getParent());
875
876 auto *MD = dyn_cast_or_null<MemoryDef>(MA);
877 if (MD && MemDefs.size() < MemorySSADefsPerBlockLimit &&
878 (getLocForWrite(&I) || isMemTerminatorInst(&I)))
879 MemDefs.push_back(MD);
880 }
881 }
882
883 // Treat byval or inalloca arguments the same as Allocas, stores to them are
884 // dead at the end of the function.
885 for (Argument &AI : F.args())
886 if (AI.hasPassPointeeByValueCopyAttr())
887 InvisibleToCallerAfterRet.insert({&AI, true});
888
889 // Collect whether there is any irreducible control flow in the function.
890 ContainsIrreducibleLoops = mayContainIrreducibleControl(F, &LI);
891
892 AnyUnreachableExit = any_of(PDT.roots(), [](const BasicBlock *E) {
893 return isa<UnreachableInst>(E->getTerminator());
894 });
895 }
896
897 LocationSize strengthenLocationSize(const Instruction *I,
898 LocationSize Size) const {
899 if (auto *CB = dyn_cast<CallBase>(I)) {
900 LibFunc F;
901 if (TLI.getLibFunc(*CB, F) && TLI.has(F) &&
902 (F == LibFunc_memset_chk || F == LibFunc_memcpy_chk)) {
903 // Use the precise location size specified by the 3rd argument
904 // for determining KillingI overwrites DeadLoc if it is a memset_chk
905 // instruction. memset_chk will write either the amount specified as 3rd
906 // argument or the function will immediately abort and exit the program.
907 // NOTE: AA may determine NoAlias if it can prove that the access size
908 // is larger than the allocation size due to that being UB. To avoid
909 // returning potentially invalid NoAlias results by AA, limit the use of
910 // the precise location size to isOverwrite.
911 if (const auto *Len = dyn_cast<ConstantInt>(CB->getArgOperand(2)))
912 return LocationSize::precise(Len->getZExtValue());
913 }
914 }
915 return Size;
916 }
917
918 /// Return 'OW_Complete' if a store to the 'KillingLoc' location (by \p
919 /// KillingI instruction) completely overwrites a store to the 'DeadLoc'
920 /// location (by \p DeadI instruction).
921 /// Return OW_MaybePartial if \p KillingI does not completely overwrite
922 /// \p DeadI, but they both write to the same underlying object. In that
923 /// case, use isPartialOverwrite to check if \p KillingI partially overwrites
924 /// \p DeadI. Returns 'OR_None' if \p KillingI is known to not overwrite the
925 /// \p DeadI. Returns 'OW_Unknown' if nothing can be determined.
926 OverwriteResult isOverwrite(const Instruction *KillingI,
927 const Instruction *DeadI,
928 const MemoryLocation &KillingLoc,
929 const MemoryLocation &DeadLoc,
930 int64_t &KillingOff, int64_t &DeadOff) {
931 // AliasAnalysis does not always account for loops. Limit overwrite checks
932 // to dependencies for which we can guarantee they are independent of any
933 // loops they are in.
934 if (!isGuaranteedLoopIndependent(DeadI, KillingI, DeadLoc))
935 return OW_Unknown;
936
937 LocationSize KillingLocSize =
938 strengthenLocationSize(KillingI, KillingLoc.Size);
939 const Value *DeadPtr = DeadLoc.Ptr->stripPointerCasts();
940 const Value *KillingPtr = KillingLoc.Ptr->stripPointerCasts();
941 const Value *DeadUndObj = getUnderlyingObject(DeadPtr);
942 const Value *KillingUndObj = getUnderlyingObject(KillingPtr);
943
944 // Check whether the killing store overwrites the whole object, in which
945 // case the size/offset of the dead store does not matter.
946 if (DeadUndObj == KillingUndObj && KillingLocSize.isPrecise() &&
947 isIdentifiedObject(KillingUndObj)) {
948 std::optional<TypeSize> KillingUndObjSize =
949 getPointerSize(KillingUndObj, DL, TLI, &F);
950 if (KillingUndObjSize && *KillingUndObjSize == KillingLocSize.getValue())
951 return OW_Complete;
952 }
953
954 // FIXME: Vet that this works for size upper-bounds. Seems unlikely that we'll
955 // get imprecise values here, though (except for unknown sizes).
956 if (!KillingLocSize.isPrecise() || !DeadLoc.Size.isPrecise()) {
957 // In case no constant size is known, try to an IR values for the number
958 // of bytes written and check if they match.
959 const auto *KillingMemI = dyn_cast<MemIntrinsic>(KillingI);
960 const auto *DeadMemI = dyn_cast<MemIntrinsic>(DeadI);
961 if (KillingMemI && DeadMemI) {
962 const Value *KillingV = KillingMemI->getLength();
963 const Value *DeadV = DeadMemI->getLength();
964 if (KillingV == DeadV && BatchAA.isMustAlias(DeadLoc, KillingLoc))
965 return OW_Complete;
966 }
967
968 // Masked stores have imprecise locations, but we can reason about them
969 // to some extent.
970 return isMaskedStoreOverwrite(KillingI, DeadI, BatchAA);
971 }
972
973 const TypeSize KillingSize = KillingLocSize.getValue();
974 const TypeSize DeadSize = DeadLoc.Size.getValue();
975 // Bail on doing Size comparison which depends on AA for now
976 // TODO: Remove AnyScalable once Alias Analysis deal with scalable vectors
977 const bool AnyScalable =
978 DeadSize.isScalable() || KillingLocSize.isScalable();
979
980 if (AnyScalable)
981 return OW_Unknown;
982 // Query the alias information
983 AliasResult AAR = BatchAA.alias(KillingLoc, DeadLoc);
984
985 // If the start pointers are the same, we just have to compare sizes to see if
986 // the killing store was larger than the dead store.
987 if (AAR == AliasResult::MustAlias) {
988 // Make sure that the KillingSize size is >= the DeadSize size.
989 if (KillingSize >= DeadSize)
990 return OW_Complete;
991 }
992
993 // If we hit a partial alias we may have a full overwrite
994 if (AAR == AliasResult::PartialAlias && AAR.hasOffset()) {
995 int32_t Off = AAR.getOffset();
996 if (Off >= 0 && (uint64_t)Off + DeadSize <= KillingSize)
997 return OW_Complete;
998 }
999
1000 // If we can't resolve the same pointers to the same object, then we can't
1001 // analyze them at all.
1002 if (DeadUndObj != KillingUndObj) {
1003 // Non aliasing stores to different objects don't overlap. Note that
1004 // if the killing store is known to overwrite whole object (out of
1005 // bounds access overwrites whole object as well) then it is assumed to
1006 // completely overwrite any store to the same object even if they don't
1007 // actually alias (see next check).
1008 if (AAR == AliasResult::NoAlias)
1009 return OW_None;
1010 return OW_Unknown;
1011 }
1012
1013 // Okay, we have stores to two completely different pointers. Try to
1014 // decompose the pointer into a "base + constant_offset" form. If the base
1015 // pointers are equal, then we can reason about the two stores.
1016 DeadOff = 0;
1017 KillingOff = 0;
1018 const Value *DeadBasePtr =
1019 GetPointerBaseWithConstantOffset(DeadPtr, DeadOff, DL);
1020 const Value *KillingBasePtr =
1021 GetPointerBaseWithConstantOffset(KillingPtr, KillingOff, DL);
1022
1023 // If the base pointers still differ, we have two completely different
1024 // stores.
1025 if (DeadBasePtr != KillingBasePtr)
1026 return OW_Unknown;
1027
1028 // The killing access completely overlaps the dead store if and only if
1029 // both start and end of the dead one is "inside" the killing one:
1030 // |<->|--dead--|<->|
1031 // |-----killing------|
1032 // Accesses may overlap if and only if start of one of them is "inside"
1033 // another one:
1034 // |<->|--dead--|<-------->|
1035 // |-------killing--------|
1036 // OR
1037 // |-------dead-------|
1038 // |<->|---killing---|<----->|
1039 //
1040 // We have to be careful here as *Off is signed while *.Size is unsigned.
1041
1042 // Check if the dead access starts "not before" the killing one.
1043 if (DeadOff >= KillingOff) {
1044 // If the dead access ends "not after" the killing access then the
1045 // dead one is completely overwritten by the killing one.
1046 if (uint64_t(DeadOff - KillingOff) + DeadSize <= KillingSize)
1047 return OW_Complete;
1048 // If start of the dead access is "before" end of the killing access
1049 // then accesses overlap.
1050 else if ((uint64_t)(DeadOff - KillingOff) < KillingSize)
1051 return OW_MaybePartial;
1052 }
1053 // If start of the killing access is "before" end of the dead access then
1054 // accesses overlap.
1055 else if ((uint64_t)(KillingOff - DeadOff) < DeadSize) {
1056 return OW_MaybePartial;
1057 }
1058
1059 // Can reach here only if accesses are known not to overlap.
1060 return OW_None;
1061 }
1062
1063 bool isInvisibleToCallerAfterRet(const Value *V) {
1064 if (isa<AllocaInst>(V))
1065 return true;
1066 auto I = InvisibleToCallerAfterRet.insert({V, false});
1067 if (I.second) {
1068 if (!isInvisibleToCallerOnUnwind(V)) {
1069 I.first->second = false;
1070 } else if (isNoAliasCall(V)) {
1071 I.first->second = !PointerMayBeCaptured(V, true, false);
1072 }
1073 }
1074 return I.first->second;
1075 }
1076
1077 bool isInvisibleToCallerOnUnwind(const Value *V) {
1078 bool RequiresNoCaptureBeforeUnwind;
1079 if (!isNotVisibleOnUnwind(V, RequiresNoCaptureBeforeUnwind))
1080 return false;
1081 if (!RequiresNoCaptureBeforeUnwind)
1082 return true;
1083
1084 auto I = CapturedBeforeReturn.insert({V, true});
1085 if (I.second)
1086 // NOTE: This could be made more precise by PointerMayBeCapturedBefore
1087 // with the killing MemoryDef. But we refrain from doing so for now to
1088 // limit compile-time and this does not cause any changes to the number
1089 // of stores removed on a large test set in practice.
1090 I.first->second = PointerMayBeCaptured(V, false, true);
1091 return !I.first->second;
1092 }
1093
1094 std::optional<MemoryLocation> getLocForWrite(Instruction *I) const {
1095 if (!I->mayWriteToMemory())
1096 return std::nullopt;
1097
1098 if (auto *CB = dyn_cast<CallBase>(I))
1099 return MemoryLocation::getForDest(CB, TLI);
1100
1102 }
1103
1104 /// Assuming this instruction has a dead analyzable write, can we delete
1105 /// this instruction?
1106 bool isRemovable(Instruction *I) {
1107 assert(getLocForWrite(I) && "Must have analyzable write");
1108
1109 // Don't remove volatile/atomic stores.
1110 if (StoreInst *SI = dyn_cast<StoreInst>(I))
1111 return SI->isUnordered();
1112
1113 if (auto *CB = dyn_cast<CallBase>(I)) {
1114 // Don't remove volatile memory intrinsics.
1115 if (auto *MI = dyn_cast<MemIntrinsic>(CB))
1116 return !MI->isVolatile();
1117
1118 // Never remove dead lifetime intrinsics, e.g. because they are followed
1119 // by a free.
1120 if (CB->isLifetimeStartOrEnd())
1121 return false;
1122
1123 return CB->use_empty() && CB->willReturn() && CB->doesNotThrow() &&
1124 !CB->isTerminator();
1125 }
1126
1127 return false;
1128 }
1129
1130 /// Returns true if \p UseInst completely overwrites \p DefLoc
1131 /// (stored by \p DefInst).
1132 bool isCompleteOverwrite(const MemoryLocation &DefLoc, Instruction *DefInst,
1133 Instruction *UseInst) {
1134 // UseInst has a MemoryDef associated in MemorySSA. It's possible for a
1135 // MemoryDef to not write to memory, e.g. a volatile load is modeled as a
1136 // MemoryDef.
1137 if (!UseInst->mayWriteToMemory())
1138 return false;
1139
1140 if (auto *CB = dyn_cast<CallBase>(UseInst))
1141 if (CB->onlyAccessesInaccessibleMemory())
1142 return false;
1143
1144 int64_t InstWriteOffset, DepWriteOffset;
1145 if (auto CC = getLocForWrite(UseInst))
1146 return isOverwrite(UseInst, DefInst, *CC, DefLoc, InstWriteOffset,
1147 DepWriteOffset) == OW_Complete;
1148 return false;
1149 }
1150
1151 /// Returns true if \p Def is not read before returning from the function.
1152 bool isWriteAtEndOfFunction(MemoryDef *Def) {
1153 LLVM_DEBUG(dbgs() << " Check if def " << *Def << " ("
1154 << *Def->getMemoryInst()
1155 << ") is at the end the function \n");
1156
1157 auto MaybeLoc = getLocForWrite(Def->getMemoryInst());
1158 if (!MaybeLoc) {
1159 LLVM_DEBUG(dbgs() << " ... could not get location for write.\n");
1160 return false;
1161 }
1162
1165 auto PushMemUses = [&WorkList, &Visited](MemoryAccess *Acc) {
1166 if (!Visited.insert(Acc).second)
1167 return;
1168 for (Use &U : Acc->uses())
1169 WorkList.push_back(cast<MemoryAccess>(U.getUser()));
1170 };
1171 PushMemUses(Def);
1172 for (unsigned I = 0; I < WorkList.size(); I++) {
1173 if (WorkList.size() >= MemorySSAScanLimit) {
1174 LLVM_DEBUG(dbgs() << " ... hit exploration limit.\n");
1175 return false;
1176 }
1177
1178 MemoryAccess *UseAccess = WorkList[I];
1179 if (isa<MemoryPhi>(UseAccess)) {
1180 // AliasAnalysis does not account for loops. Limit elimination to
1181 // candidates for which we can guarantee they always store to the same
1182 // memory location.
1183 if (!isGuaranteedLoopInvariant(MaybeLoc->Ptr))
1184 return false;
1185
1186 PushMemUses(cast<MemoryPhi>(UseAccess));
1187 continue;
1188 }
1189 // TODO: Checking for aliasing is expensive. Consider reducing the amount
1190 // of times this is called and/or caching it.
1191 Instruction *UseInst = cast<MemoryUseOrDef>(UseAccess)->getMemoryInst();
1192 if (isReadClobber(*MaybeLoc, UseInst)) {
1193 LLVM_DEBUG(dbgs() << " ... hit read clobber " << *UseInst << ".\n");
1194 return false;
1195 }
1196
1197 if (MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess))
1198 PushMemUses(UseDef);
1199 }
1200 return true;
1201 }
1202
1203 /// If \p I is a memory terminator like llvm.lifetime.end or free, return a
1204 /// pair with the MemoryLocation terminated by \p I and a boolean flag
1205 /// indicating whether \p I is a free-like call.
1206 std::optional<std::pair<MemoryLocation, bool>>
1207 getLocForTerminator(Instruction *I) const {
1208 uint64_t Len;
1209 Value *Ptr;
1210 if (match(I, m_Intrinsic<Intrinsic::lifetime_end>(m_ConstantInt(Len),
1211 m_Value(Ptr))))
1212 return {std::make_pair(MemoryLocation(Ptr, Len), false)};
1213
1214 if (auto *CB = dyn_cast<CallBase>(I)) {
1215 if (Value *FreedOp = getFreedOperand(CB, &TLI))
1216 return {std::make_pair(MemoryLocation::getAfter(FreedOp), true)};
1217 }
1218
1219 return std::nullopt;
1220 }
1221
1222 /// Returns true if \p I is a memory terminator instruction like
1223 /// llvm.lifetime.end or free.
1224 bool isMemTerminatorInst(Instruction *I) const {
1225 auto *CB = dyn_cast<CallBase>(I);
1226 return CB && (CB->getIntrinsicID() == Intrinsic::lifetime_end ||
1227 getFreedOperand(CB, &TLI) != nullptr);
1228 }
1229
1230 /// Returns true if \p MaybeTerm is a memory terminator for \p Loc from
1231 /// instruction \p AccessI.
1232 bool isMemTerminator(const MemoryLocation &Loc, Instruction *AccessI,
1233 Instruction *MaybeTerm) {
1234 std::optional<std::pair<MemoryLocation, bool>> MaybeTermLoc =
1235 getLocForTerminator(MaybeTerm);
1236
1237 if (!MaybeTermLoc)
1238 return false;
1239
1240 // If the terminator is a free-like call, all accesses to the underlying
1241 // object can be considered terminated.
1242 if (getUnderlyingObject(Loc.Ptr) !=
1243 getUnderlyingObject(MaybeTermLoc->first.Ptr))
1244 return false;
1245
1246 auto TermLoc = MaybeTermLoc->first;
1247 if (MaybeTermLoc->second) {
1248 const Value *LocUO = getUnderlyingObject(Loc.Ptr);
1249 return BatchAA.isMustAlias(TermLoc.Ptr, LocUO);
1250 }
1251 int64_t InstWriteOffset = 0;
1252 int64_t DepWriteOffset = 0;
1253 return isOverwrite(MaybeTerm, AccessI, TermLoc, Loc, InstWriteOffset,
1254 DepWriteOffset) == OW_Complete;
1255 }
1256
1257 // Returns true if \p Use may read from \p DefLoc.
1258 bool isReadClobber(const MemoryLocation &DefLoc, Instruction *UseInst) {
1259 if (isNoopIntrinsic(UseInst))
1260 return false;
1261
1262 // Monotonic or weaker atomic stores can be re-ordered and do not need to be
1263 // treated as read clobber.
1264 if (auto SI = dyn_cast<StoreInst>(UseInst))
1265 return isStrongerThan(SI->getOrdering(), AtomicOrdering::Monotonic);
1266
1267 if (!UseInst->mayReadFromMemory())
1268 return false;
1269
1270 if (auto *CB = dyn_cast<CallBase>(UseInst))
1271 if (CB->onlyAccessesInaccessibleMemory())
1272 return false;
1273
1274 return isRefSet(BatchAA.getModRefInfo(UseInst, DefLoc));
1275 }
1276
1277 /// Returns true if a dependency between \p Current and \p KillingDef is
1278 /// guaranteed to be loop invariant for the loops that they are in. Either
1279 /// because they are known to be in the same block, in the same loop level or
1280 /// by guaranteeing that \p CurrentLoc only references a single MemoryLocation
1281 /// during execution of the containing function.
1282 bool isGuaranteedLoopIndependent(const Instruction *Current,
1283 const Instruction *KillingDef,
1284 const MemoryLocation &CurrentLoc) {
1285 // If the dependency is within the same block or loop level (being careful
1286 // of irreducible loops), we know that AA will return a valid result for the
1287 // memory dependency. (Both at the function level, outside of any loop,
1288 // would also be valid but we currently disable that to limit compile time).
1289 if (Current->getParent() == KillingDef->getParent())
1290 return true;
1291 const Loop *CurrentLI = LI.getLoopFor(Current->getParent());
1292 if (!ContainsIrreducibleLoops && CurrentLI &&
1293 CurrentLI == LI.getLoopFor(KillingDef->getParent()))
1294 return true;
1295 // Otherwise check the memory location is invariant to any loops.
1296 return isGuaranteedLoopInvariant(CurrentLoc.Ptr);
1297 }
1298
1299 /// Returns true if \p Ptr is guaranteed to be loop invariant for any possible
1300 /// loop. In particular, this guarantees that it only references a single
1301 /// MemoryLocation during execution of the containing function.
1302 bool isGuaranteedLoopInvariant(const Value *Ptr) {
1303 Ptr = Ptr->stripPointerCasts();
1304 if (auto *GEP = dyn_cast<GEPOperator>(Ptr))
1305 if (GEP->hasAllConstantIndices())
1306 Ptr = GEP->getPointerOperand()->stripPointerCasts();
1307
1308 if (auto *I = dyn_cast<Instruction>(Ptr)) {
1309 return I->getParent()->isEntryBlock() ||
1310 (!ContainsIrreducibleLoops && !LI.getLoopFor(I->getParent()));
1311 }
1312 return true;
1313 }
1314
1315 // Find a MemoryDef writing to \p KillingLoc and dominating \p StartAccess,
1316 // with no read access between them or on any other path to a function exit
1317 // block if \p KillingLoc is not accessible after the function returns. If
1318 // there is no such MemoryDef, return std::nullopt. The returned value may not
1319 // (completely) overwrite \p KillingLoc. Currently we bail out when we
1320 // encounter an aliasing MemoryUse (read).
1321 std::optional<MemoryAccess *>
1322 getDomMemoryDef(MemoryDef *KillingDef, MemoryAccess *StartAccess,
1323 const MemoryLocation &KillingLoc, const Value *KillingUndObj,
1324 unsigned &ScanLimit, unsigned &WalkerStepLimit,
1325 bool IsMemTerm, unsigned &PartialLimit) {
1326 if (ScanLimit == 0 || WalkerStepLimit == 0) {
1327 LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n");
1328 return std::nullopt;
1329 }
1330
1331 MemoryAccess *Current = StartAccess;
1332 Instruction *KillingI = KillingDef->getMemoryInst();
1333 LLVM_DEBUG(dbgs() << " trying to get dominating access\n");
1334
1335 // Only optimize defining access of KillingDef when directly starting at its
1336 // defining access. The defining access also must only access KillingLoc. At
1337 // the moment we only support instructions with a single write location, so
1338 // it should be sufficient to disable optimizations for instructions that
1339 // also read from memory.
1340 bool CanOptimize = OptimizeMemorySSA &&
1341 KillingDef->getDefiningAccess() == StartAccess &&
1342 !KillingI->mayReadFromMemory();
1343
1344 // Find the next clobbering Mod access for DefLoc, starting at StartAccess.
1345 std::optional<MemoryLocation> CurrentLoc;
1346 for (;; Current = cast<MemoryDef>(Current)->getDefiningAccess()) {
1347 LLVM_DEBUG({
1348 dbgs() << " visiting " << *Current;
1349 if (!MSSA.isLiveOnEntryDef(Current) && isa<MemoryUseOrDef>(Current))
1350 dbgs() << " (" << *cast<MemoryUseOrDef>(Current)->getMemoryInst()
1351 << ")";
1352 dbgs() << "\n";
1353 });
1354
1355 // Reached TOP.
1356 if (MSSA.isLiveOnEntryDef(Current)) {
1357 LLVM_DEBUG(dbgs() << " ... found LiveOnEntryDef\n");
1358 if (CanOptimize && Current != KillingDef->getDefiningAccess())
1359 // The first clobbering def is... none.
1360 KillingDef->setOptimized(Current);
1361 return std::nullopt;
1362 }
1363
1364 // Cost of a step. Accesses in the same block are more likely to be valid
1365 // candidates for elimination, hence consider them cheaper.
1366 unsigned StepCost = KillingDef->getBlock() == Current->getBlock()
1369 if (WalkerStepLimit <= StepCost) {
1370 LLVM_DEBUG(dbgs() << " ... hit walker step limit\n");
1371 return std::nullopt;
1372 }
1373 WalkerStepLimit -= StepCost;
1374
1375 // Return for MemoryPhis. They cannot be eliminated directly and the
1376 // caller is responsible for traversing them.
1377 if (isa<MemoryPhi>(Current)) {
1378 LLVM_DEBUG(dbgs() << " ... found MemoryPhi\n");
1379 return Current;
1380 }
1381
1382 // Below, check if CurrentDef is a valid candidate to be eliminated by
1383 // KillingDef. If it is not, check the next candidate.
1384 MemoryDef *CurrentDef = cast<MemoryDef>(Current);
1385 Instruction *CurrentI = CurrentDef->getMemoryInst();
1386
1387 if (canSkipDef(CurrentDef, !isInvisibleToCallerOnUnwind(KillingUndObj))) {
1388 CanOptimize = false;
1389 continue;
1390 }
1391
1392 // Before we try to remove anything, check for any extra throwing
1393 // instructions that block us from DSEing
1394 if (mayThrowBetween(KillingI, CurrentI, KillingUndObj)) {
1395 LLVM_DEBUG(dbgs() << " ... skip, may throw!\n");
1396 return std::nullopt;
1397 }
1398
1399 // Check for anything that looks like it will be a barrier to further
1400 // removal
1401 if (isDSEBarrier(KillingUndObj, CurrentI)) {
1402 LLVM_DEBUG(dbgs() << " ... skip, barrier\n");
1403 return std::nullopt;
1404 }
1405
1406 // If Current is known to be on path that reads DefLoc or is a read
1407 // clobber, bail out, as the path is not profitable. We skip this check
1408 // for intrinsic calls, because the code knows how to handle memcpy
1409 // intrinsics.
1410 if (!isa<IntrinsicInst>(CurrentI) && isReadClobber(KillingLoc, CurrentI))
1411 return std::nullopt;
1412
1413 // Quick check if there are direct uses that are read-clobbers.
1414 if (any_of(Current->uses(), [this, &KillingLoc, StartAccess](Use &U) {
1415 if (auto *UseOrDef = dyn_cast<MemoryUseOrDef>(U.getUser()))
1416 return !MSSA.dominates(StartAccess, UseOrDef) &&
1417 isReadClobber(KillingLoc, UseOrDef->getMemoryInst());
1418 return false;
1419 })) {
1420 LLVM_DEBUG(dbgs() << " ... found a read clobber\n");
1421 return std::nullopt;
1422 }
1423
1424 // If Current does not have an analyzable write location or is not
1425 // removable, skip it.
1426 CurrentLoc = getLocForWrite(CurrentI);
1427 if (!CurrentLoc || !isRemovable(CurrentI)) {
1428 CanOptimize = false;
1429 continue;
1430 }
1431
1432 // AliasAnalysis does not account for loops. Limit elimination to
1433 // candidates for which we can guarantee they always store to the same
1434 // memory location and not located in different loops.
1435 if (!isGuaranteedLoopIndependent(CurrentI, KillingI, *CurrentLoc)) {
1436 LLVM_DEBUG(dbgs() << " ... not guaranteed loop independent\n");
1437 CanOptimize = false;
1438 continue;
1439 }
1440
1441 if (IsMemTerm) {
1442 // If the killing def is a memory terminator (e.g. lifetime.end), check
1443 // the next candidate if the current Current does not write the same
1444 // underlying object as the terminator.
1445 if (!isMemTerminator(*CurrentLoc, CurrentI, KillingI)) {
1446 CanOptimize = false;
1447 continue;
1448 }
1449 } else {
1450 int64_t KillingOffset = 0;
1451 int64_t DeadOffset = 0;
1452 auto OR = isOverwrite(KillingI, CurrentI, KillingLoc, *CurrentLoc,
1453 KillingOffset, DeadOffset);
1454 if (CanOptimize) {
1455 // CurrentDef is the earliest write clobber of KillingDef. Use it as
1456 // optimized access. Do not optimize if CurrentDef is already the
1457 // defining access of KillingDef.
1458 if (CurrentDef != KillingDef->getDefiningAccess() &&
1459 (OR == OW_Complete || OR == OW_MaybePartial))
1460 KillingDef->setOptimized(CurrentDef);
1461
1462 // Once a may-aliasing def is encountered do not set an optimized
1463 // access.
1464 if (OR != OW_None)
1465 CanOptimize = false;
1466 }
1467
1468 // If Current does not write to the same object as KillingDef, check
1469 // the next candidate.
1470 if (OR == OW_Unknown || OR == OW_None)
1471 continue;
1472 else if (OR == OW_MaybePartial) {
1473 // If KillingDef only partially overwrites Current, check the next
1474 // candidate if the partial step limit is exceeded. This aggressively
1475 // limits the number of candidates for partial store elimination,
1476 // which are less likely to be removable in the end.
1477 if (PartialLimit <= 1) {
1478 WalkerStepLimit -= 1;
1479 LLVM_DEBUG(dbgs() << " ... reached partial limit ... continue with next access\n");
1480 continue;
1481 }
1482 PartialLimit -= 1;
1483 }
1484 }
1485 break;
1486 };
1487
1488 // Accesses to objects accessible after the function returns can only be
1489 // eliminated if the access is dead along all paths to the exit. Collect
1490 // the blocks with killing (=completely overwriting MemoryDefs) and check if
1491 // they cover all paths from MaybeDeadAccess to any function exit.
1493 KillingDefs.insert(KillingDef->getMemoryInst());
1494 MemoryAccess *MaybeDeadAccess = Current;
1495 MemoryLocation MaybeDeadLoc = *CurrentLoc;
1496 Instruction *MaybeDeadI = cast<MemoryDef>(MaybeDeadAccess)->getMemoryInst();
1497 LLVM_DEBUG(dbgs() << " Checking for reads of " << *MaybeDeadAccess << " ("
1498 << *MaybeDeadI << ")\n");
1499
1501 auto PushMemUses = [&WorkList](MemoryAccess *Acc) {
1502 for (Use &U : Acc->uses())
1503 WorkList.insert(cast<MemoryAccess>(U.getUser()));
1504 };
1505 PushMemUses(MaybeDeadAccess);
1506
1507 // Check if DeadDef may be read.
1508 for (unsigned I = 0; I < WorkList.size(); I++) {
1509 MemoryAccess *UseAccess = WorkList[I];
1510
1511 LLVM_DEBUG(dbgs() << " " << *UseAccess);
1512 // Bail out if the number of accesses to check exceeds the scan limit.
1513 if (ScanLimit < (WorkList.size() - I)) {
1514 LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n");
1515 return std::nullopt;
1516 }
1517 --ScanLimit;
1518 NumDomMemDefChecks++;
1519
1520 if (isa<MemoryPhi>(UseAccess)) {
1521 if (any_of(KillingDefs, [this, UseAccess](Instruction *KI) {
1522 return DT.properlyDominates(KI->getParent(),
1523 UseAccess->getBlock());
1524 })) {
1525 LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing block\n");
1526 continue;
1527 }
1528 LLVM_DEBUG(dbgs() << "\n ... adding PHI uses\n");
1529 PushMemUses(UseAccess);
1530 continue;
1531 }
1532
1533 Instruction *UseInst = cast<MemoryUseOrDef>(UseAccess)->getMemoryInst();
1534 LLVM_DEBUG(dbgs() << " (" << *UseInst << ")\n");
1535
1536 if (any_of(KillingDefs, [this, UseInst](Instruction *KI) {
1537 return DT.dominates(KI, UseInst);
1538 })) {
1539 LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing def\n");
1540 continue;
1541 }
1542
1543 // A memory terminator kills all preceeding MemoryDefs and all succeeding
1544 // MemoryAccesses. We do not have to check it's users.
1545 if (isMemTerminator(MaybeDeadLoc, MaybeDeadI, UseInst)) {
1546 LLVM_DEBUG(
1547 dbgs()
1548 << " ... skipping, memterminator invalidates following accesses\n");
1549 continue;
1550 }
1551
1552 if (isNoopIntrinsic(cast<MemoryUseOrDef>(UseAccess)->getMemoryInst())) {
1553 LLVM_DEBUG(dbgs() << " ... adding uses of intrinsic\n");
1554 PushMemUses(UseAccess);
1555 continue;
1556 }
1557
1558 if (UseInst->mayThrow() && !isInvisibleToCallerOnUnwind(KillingUndObj)) {
1559 LLVM_DEBUG(dbgs() << " ... found throwing instruction\n");
1560 return std::nullopt;
1561 }
1562
1563 // Uses which may read the original MemoryDef mean we cannot eliminate the
1564 // original MD. Stop walk.
1565 if (isReadClobber(MaybeDeadLoc, UseInst)) {
1566 LLVM_DEBUG(dbgs() << " ... found read clobber\n");
1567 return std::nullopt;
1568 }
1569
1570 // If this worklist walks back to the original memory access (and the
1571 // pointer is not guarenteed loop invariant) then we cannot assume that a
1572 // store kills itself.
1573 if (MaybeDeadAccess == UseAccess &&
1574 !isGuaranteedLoopInvariant(MaybeDeadLoc.Ptr)) {
1575 LLVM_DEBUG(dbgs() << " ... found not loop invariant self access\n");
1576 return std::nullopt;
1577 }
1578 // Otherwise, for the KillingDef and MaybeDeadAccess we only have to check
1579 // if it reads the memory location.
1580 // TODO: It would probably be better to check for self-reads before
1581 // calling the function.
1582 if (KillingDef == UseAccess || MaybeDeadAccess == UseAccess) {
1583 LLVM_DEBUG(dbgs() << " ... skipping killing def/dom access\n");
1584 continue;
1585 }
1586
1587 // Check all uses for MemoryDefs, except for defs completely overwriting
1588 // the original location. Otherwise we have to check uses of *all*
1589 // MemoryDefs we discover, including non-aliasing ones. Otherwise we might
1590 // miss cases like the following
1591 // 1 = Def(LoE) ; <----- DeadDef stores [0,1]
1592 // 2 = Def(1) ; (2, 1) = NoAlias, stores [2,3]
1593 // Use(2) ; MayAlias 2 *and* 1, loads [0, 3].
1594 // (The Use points to the *first* Def it may alias)
1595 // 3 = Def(1) ; <---- Current (3, 2) = NoAlias, (3,1) = MayAlias,
1596 // stores [0,1]
1597 if (MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess)) {
1598 if (isCompleteOverwrite(MaybeDeadLoc, MaybeDeadI, UseInst)) {
1599 BasicBlock *MaybeKillingBlock = UseInst->getParent();
1600 if (PostOrderNumbers.find(MaybeKillingBlock)->second <
1601 PostOrderNumbers.find(MaybeDeadAccess->getBlock())->second) {
1602 if (!isInvisibleToCallerAfterRet(KillingUndObj)) {
1604 << " ... found killing def " << *UseInst << "\n");
1605 KillingDefs.insert(UseInst);
1606 }
1607 } else {
1609 << " ... found preceeding def " << *UseInst << "\n");
1610 return std::nullopt;
1611 }
1612 } else
1613 PushMemUses(UseDef);
1614 }
1615 }
1616
1617 // For accesses to locations visible after the function returns, make sure
1618 // that the location is dead (=overwritten) along all paths from
1619 // MaybeDeadAccess to the exit.
1620 if (!isInvisibleToCallerAfterRet(KillingUndObj)) {
1621 SmallPtrSet<BasicBlock *, 16> KillingBlocks;
1622 for (Instruction *KD : KillingDefs)
1623 KillingBlocks.insert(KD->getParent());
1624 assert(!KillingBlocks.empty() &&
1625 "Expected at least a single killing block");
1626
1627 // Find the common post-dominator of all killing blocks.
1628 BasicBlock *CommonPred = *KillingBlocks.begin();
1629 for (BasicBlock *BB : llvm::drop_begin(KillingBlocks)) {
1630 if (!CommonPred)
1631 break;
1632 CommonPred = PDT.findNearestCommonDominator(CommonPred, BB);
1633 }
1634
1635 // If the common post-dominator does not post-dominate MaybeDeadAccess,
1636 // there is a path from MaybeDeadAccess to an exit not going through a
1637 // killing block.
1638 if (!PDT.dominates(CommonPred, MaybeDeadAccess->getBlock())) {
1639 if (!AnyUnreachableExit)
1640 return std::nullopt;
1641
1642 // Fall back to CFG scan starting at all non-unreachable roots if not
1643 // all paths to the exit go through CommonPred.
1644 CommonPred = nullptr;
1645 }
1646
1647 // If CommonPred itself is in the set of killing blocks, we're done.
1648 if (KillingBlocks.count(CommonPred))
1649 return {MaybeDeadAccess};
1650
1651 SetVector<BasicBlock *> WorkList;
1652 // If CommonPred is null, there are multiple exits from the function.
1653 // They all have to be added to the worklist.
1654 if (CommonPred)
1655 WorkList.insert(CommonPred);
1656 else
1657 for (BasicBlock *R : PDT.roots()) {
1658 if (!isa<UnreachableInst>(R->getTerminator()))
1659 WorkList.insert(R);
1660 }
1661
1662 NumCFGTries++;
1663 // Check if all paths starting from an exit node go through one of the
1664 // killing blocks before reaching MaybeDeadAccess.
1665 for (unsigned I = 0; I < WorkList.size(); I++) {
1666 NumCFGChecks++;
1667 BasicBlock *Current = WorkList[I];
1668 if (KillingBlocks.count(Current))
1669 continue;
1670 if (Current == MaybeDeadAccess->getBlock())
1671 return std::nullopt;
1672
1673 // MaybeDeadAccess is reachable from the entry, so we don't have to
1674 // explore unreachable blocks further.
1675 if (!DT.isReachableFromEntry(Current))
1676 continue;
1677
1678 for (BasicBlock *Pred : predecessors(Current))
1679 WorkList.insert(Pred);
1680
1681 if (WorkList.size() >= MemorySSAPathCheckLimit)
1682 return std::nullopt;
1683 }
1684 NumCFGSuccess++;
1685 }
1686
1687 // No aliasing MemoryUses of MaybeDeadAccess found, MaybeDeadAccess is
1688 // potentially dead.
1689 return {MaybeDeadAccess};
1690 }
1691
1692 // Delete dead memory defs
1694 MemorySSAUpdater Updater(&MSSA);
1695 SmallVector<Instruction *, 32> NowDeadInsts;
1696 NowDeadInsts.push_back(SI);
1697 --NumFastOther;
1698
1699 while (!NowDeadInsts.empty()) {
1700 Instruction *DeadInst = NowDeadInsts.pop_back_val();
1701 ++NumFastOther;
1702
1703 // Try to preserve debug information attached to the dead instruction.
1704 salvageDebugInfo(*DeadInst);
1705 salvageKnowledge(DeadInst);
1706
1707 // Remove the Instruction from MSSA.
1708 if (MemoryAccess *MA = MSSA.getMemoryAccess(DeadInst)) {
1709 if (MemoryDef *MD = dyn_cast<MemoryDef>(MA)) {
1710 SkipStores.insert(MD);
1711 if (auto *SI = dyn_cast<StoreInst>(MD->getMemoryInst())) {
1712 if (SI->getValueOperand()->getType()->isPointerTy()) {
1713 const Value *UO = getUnderlyingObject(SI->getValueOperand());
1714 if (CapturedBeforeReturn.erase(UO))
1715 ShouldIterateEndOfFunctionDSE = true;
1716 InvisibleToCallerAfterRet.erase(UO);
1717 }
1718 }
1719 }
1720
1721 Updater.removeMemoryAccess(MA);
1722 }
1723
1724 auto I = IOLs.find(DeadInst->getParent());
1725 if (I != IOLs.end())
1726 I->second.erase(DeadInst);
1727 // Remove its operands
1728 for (Use &O : DeadInst->operands())
1729 if (Instruction *OpI = dyn_cast<Instruction>(O)) {
1730 O = nullptr;
1731 if (isInstructionTriviallyDead(OpI, &TLI))
1732 NowDeadInsts.push_back(OpI);
1733 }
1734
1735 EI.removeInstruction(DeadInst);
1736 DeadInst->eraseFromParent();
1737 }
1738 }
1739
1740 // Check for any extra throws between \p KillingI and \p DeadI that block
1741 // DSE. This only checks extra maythrows (those that aren't MemoryDef's).
1742 // MemoryDef that may throw are handled during the walk from one def to the
1743 // next.
1744 bool mayThrowBetween(Instruction *KillingI, Instruction *DeadI,
1745 const Value *KillingUndObj) {
1746 // First see if we can ignore it by using the fact that KillingI is an
1747 // alloca/alloca like object that is not visible to the caller during
1748 // execution of the function.
1749 if (KillingUndObj && isInvisibleToCallerOnUnwind(KillingUndObj))
1750 return false;
1751
1752 if (KillingI->getParent() == DeadI->getParent())
1753 return ThrowingBlocks.count(KillingI->getParent());
1754 return !ThrowingBlocks.empty();
1755 }
1756
1757 // Check if \p DeadI acts as a DSE barrier for \p KillingI. The following
1758 // instructions act as barriers:
1759 // * A memory instruction that may throw and \p KillingI accesses a non-stack
1760 // object.
1761 // * Atomic stores stronger that monotonic.
1762 bool isDSEBarrier(const Value *KillingUndObj, Instruction *DeadI) {
1763 // If DeadI may throw it acts as a barrier, unless we are to an
1764 // alloca/alloca like object that does not escape.
1765 if (DeadI->mayThrow() && !isInvisibleToCallerOnUnwind(KillingUndObj))
1766 return true;
1767
1768 // If DeadI is an atomic load/store stronger than monotonic, do not try to
1769 // eliminate/reorder it.
1770 if (DeadI->isAtomic()) {
1771 if (auto *LI = dyn_cast<LoadInst>(DeadI))
1772 return isStrongerThanMonotonic(LI->getOrdering());
1773 if (auto *SI = dyn_cast<StoreInst>(DeadI))
1774 return isStrongerThanMonotonic(SI->getOrdering());
1775 if (auto *ARMW = dyn_cast<AtomicRMWInst>(DeadI))
1776 return isStrongerThanMonotonic(ARMW->getOrdering());
1777 if (auto *CmpXchg = dyn_cast<AtomicCmpXchgInst>(DeadI))
1778 return isStrongerThanMonotonic(CmpXchg->getSuccessOrdering()) ||
1779 isStrongerThanMonotonic(CmpXchg->getFailureOrdering());
1780 llvm_unreachable("other instructions should be skipped in MemorySSA");
1781 }
1782 return false;
1783 }
1784
1785 /// Eliminate writes to objects that are not visible in the caller and are not
1786 /// accessed before returning from the function.
1787 bool eliminateDeadWritesAtEndOfFunction() {
1788 bool MadeChange = false;
1789 LLVM_DEBUG(
1790 dbgs()
1791 << "Trying to eliminate MemoryDefs at the end of the function\n");
1792 do {
1793 ShouldIterateEndOfFunctionDSE = false;
1794 for (MemoryDef *Def : llvm::reverse(MemDefs)) {
1795 if (SkipStores.contains(Def))
1796 continue;
1797
1798 Instruction *DefI = Def->getMemoryInst();
1799 auto DefLoc = getLocForWrite(DefI);
1800 if (!DefLoc || !isRemovable(DefI))
1801 continue;
1802
1803 // NOTE: Currently eliminating writes at the end of a function is
1804 // limited to MemoryDefs with a single underlying object, to save
1805 // compile-time. In practice it appears the case with multiple
1806 // underlying objects is very uncommon. If it turns out to be important,
1807 // we can use getUnderlyingObjects here instead.
1808 const Value *UO = getUnderlyingObject(DefLoc->Ptr);
1809 if (!isInvisibleToCallerAfterRet(UO))
1810 continue;
1811
1812 if (isWriteAtEndOfFunction(Def)) {
1813 // See through pointer-to-pointer bitcasts
1814 LLVM_DEBUG(dbgs() << " ... MemoryDef is not accessed until the end "
1815 "of the function\n");
1817 ++NumFastStores;
1818 MadeChange = true;
1819 }
1820 }
1821 } while (ShouldIterateEndOfFunctionDSE);
1822 return MadeChange;
1823 }
1824
1825 /// If we have a zero initializing memset following a call to malloc,
1826 /// try folding it into a call to calloc.
1827 bool tryFoldIntoCalloc(MemoryDef *Def, const Value *DefUO) {
1828 Instruction *DefI = Def->getMemoryInst();
1829 MemSetInst *MemSet = dyn_cast<MemSetInst>(DefI);
1830 if (!MemSet)
1831 // TODO: Could handle zero store to small allocation as well.
1832 return false;
1833 Constant *StoredConstant = dyn_cast<Constant>(MemSet->getValue());
1834 if (!StoredConstant || !StoredConstant->isNullValue())
1835 return false;
1836
1837 if (!isRemovable(DefI))
1838 // The memset might be volatile..
1839 return false;
1840
1841 if (F.hasFnAttribute(Attribute::SanitizeMemory) ||
1842 F.hasFnAttribute(Attribute::SanitizeAddress) ||
1843 F.hasFnAttribute(Attribute::SanitizeHWAddress) ||
1844 F.getName() == "calloc")
1845 return false;
1846 auto *Malloc = const_cast<CallInst *>(dyn_cast<CallInst>(DefUO));
1847 if (!Malloc)
1848 return false;
1849 auto *InnerCallee = Malloc->getCalledFunction();
1850 if (!InnerCallee)
1851 return false;
1852 LibFunc Func;
1853 if (!TLI.getLibFunc(*InnerCallee, Func) || !TLI.has(Func) ||
1854 Func != LibFunc_malloc)
1855 return false;
1856 // Gracefully handle malloc with unexpected memory attributes.
1857 auto *MallocDef = dyn_cast_or_null<MemoryDef>(MSSA.getMemoryAccess(Malloc));
1858 if (!MallocDef)
1859 return false;
1860
1861 auto shouldCreateCalloc = [](CallInst *Malloc, CallInst *Memset) {
1862 // Check for br(icmp ptr, null), truebb, falsebb) pattern at the end
1863 // of malloc block
1864 auto *MallocBB = Malloc->getParent(),
1865 *MemsetBB = Memset->getParent();
1866 if (MallocBB == MemsetBB)
1867 return true;
1868 auto *Ptr = Memset->getArgOperand(0);
1869 auto *TI = MallocBB->getTerminator();
1871 BasicBlock *TrueBB, *FalseBB;
1872 if (!match(TI, m_Br(m_ICmp(Pred, m_Specific(Ptr), m_Zero()), TrueBB,
1873 FalseBB)))
1874 return false;
1875 if (Pred != ICmpInst::ICMP_EQ || MemsetBB != FalseBB)
1876 return false;
1877 return true;
1878 };
1879
1880 if (Malloc->getOperand(0) != MemSet->getLength())
1881 return false;
1882 if (!shouldCreateCalloc(Malloc, MemSet) ||
1883 !DT.dominates(Malloc, MemSet) ||
1884 !memoryIsNotModifiedBetween(Malloc, MemSet, BatchAA, DL, &DT))
1885 return false;
1886 IRBuilder<> IRB(Malloc);
1887 Type *SizeTTy = Malloc->getArgOperand(0)->getType();
1888 auto *Calloc = emitCalloc(ConstantInt::get(SizeTTy, 1),
1889 Malloc->getArgOperand(0), IRB, TLI);
1890 if (!Calloc)
1891 return false;
1892 MemorySSAUpdater Updater(&MSSA);
1893 auto *NewAccess =
1894 Updater.createMemoryAccessAfter(cast<Instruction>(Calloc), nullptr,
1895 MallocDef);
1896 auto *NewAccessMD = cast<MemoryDef>(NewAccess);
1897 Updater.insertDef(NewAccessMD, /*RenameUses=*/true);
1898 Updater.removeMemoryAccess(Malloc);
1899 Malloc->replaceAllUsesWith(Calloc);
1900 Malloc->eraseFromParent();
1901 return true;
1902 }
1903
1904 /// \returns true if \p Def is a no-op store, either because it
1905 /// directly stores back a loaded value or stores zero to a calloced object.
1906 bool storeIsNoop(MemoryDef *Def, const Value *DefUO) {
1907 Instruction *DefI = Def->getMemoryInst();
1908 StoreInst *Store = dyn_cast<StoreInst>(DefI);
1909 MemSetInst *MemSet = dyn_cast<MemSetInst>(DefI);
1910 Constant *StoredConstant = nullptr;
1911 if (Store)
1912 StoredConstant = dyn_cast<Constant>(Store->getOperand(0));
1913 else if (MemSet)
1914 StoredConstant = dyn_cast<Constant>(MemSet->getValue());
1915 else
1916 return false;
1917
1918 if (!isRemovable(DefI))
1919 return false;
1920
1921 if (StoredConstant) {
1922 Constant *InitC =
1923 getInitialValueOfAllocation(DefUO, &TLI, StoredConstant->getType());
1924 // If the clobbering access is LiveOnEntry, no instructions between them
1925 // can modify the memory location.
1926 if (InitC && InitC == StoredConstant)
1927 return MSSA.isLiveOnEntryDef(
1928 MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def, BatchAA));
1929 }
1930
1931 if (!Store)
1932 return false;
1933
1934 if (auto *LoadI = dyn_cast<LoadInst>(Store->getOperand(0))) {
1935 if (LoadI->getPointerOperand() == Store->getOperand(1)) {
1936 // Get the defining access for the load.
1937 auto *LoadAccess = MSSA.getMemoryAccess(LoadI)->getDefiningAccess();
1938 // Fast path: the defining accesses are the same.
1939 if (LoadAccess == Def->getDefiningAccess())
1940 return true;
1941
1942 // Look through phi accesses. Recursively scan all phi accesses by
1943 // adding them to a worklist. Bail when we run into a memory def that
1944 // does not match LoadAccess.
1946 MemoryAccess *Current =
1947 MSSA.getWalker()->getClobberingMemoryAccess(Def, BatchAA);
1948 // We don't want to bail when we run into the store memory def. But,
1949 // the phi access may point to it. So, pretend like we've already
1950 // checked it.
1951 ToCheck.insert(Def);
1952 ToCheck.insert(Current);
1953 // Start at current (1) to simulate already having checked Def.
1954 for (unsigned I = 1; I < ToCheck.size(); ++I) {
1955 Current = ToCheck[I];
1956 if (auto PhiAccess = dyn_cast<MemoryPhi>(Current)) {
1957 // Check all the operands.
1958 for (auto &Use : PhiAccess->incoming_values())
1959 ToCheck.insert(cast<MemoryAccess>(&Use));
1960 continue;
1961 }
1962
1963 // If we found a memory def, bail. This happens when we have an
1964 // unrelated write in between an otherwise noop store.
1965 assert(isa<MemoryDef>(Current) &&
1966 "Only MemoryDefs should reach here.");
1967 // TODO: Skip no alias MemoryDefs that have no aliasing reads.
1968 // We are searching for the definition of the store's destination.
1969 // So, if that is the same definition as the load, then this is a
1970 // noop. Otherwise, fail.
1971 if (LoadAccess != Current)
1972 return false;
1973 }
1974 return true;
1975 }
1976 }
1977
1978 return false;
1979 }
1980
1981 bool removePartiallyOverlappedStores(InstOverlapIntervalsTy &IOL) {
1982 bool Changed = false;
1983 for (auto OI : IOL) {
1984 Instruction *DeadI = OI.first;
1985 MemoryLocation Loc = *getLocForWrite(DeadI);
1986 assert(isRemovable(DeadI) && "Expect only removable instruction");
1987
1988 const Value *Ptr = Loc.Ptr->stripPointerCasts();
1989 int64_t DeadStart = 0;
1990 uint64_t DeadSize = Loc.Size.getValue();
1991 GetPointerBaseWithConstantOffset(Ptr, DeadStart, DL);
1992 OverlapIntervalsTy &IntervalMap = OI.second;
1993 Changed |= tryToShortenEnd(DeadI, IntervalMap, DeadStart, DeadSize);
1994 if (IntervalMap.empty())
1995 continue;
1996 Changed |= tryToShortenBegin(DeadI, IntervalMap, DeadStart, DeadSize);
1997 }
1998 return Changed;
1999 }
2000
2001 /// Eliminates writes to locations where the value that is being written
2002 /// is already stored at the same location.
2003 bool eliminateRedundantStoresOfExistingValues() {
2004 bool MadeChange = false;
2005 LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs that write the "
2006 "already existing value\n");
2007 for (auto *Def : MemDefs) {
2008 if (SkipStores.contains(Def) || MSSA.isLiveOnEntryDef(Def))
2009 continue;
2010
2011 Instruction *DefInst = Def->getMemoryInst();
2012 auto MaybeDefLoc = getLocForWrite(DefInst);
2013 if (!MaybeDefLoc || !isRemovable(DefInst))
2014 continue;
2015
2016 MemoryDef *UpperDef;
2017 // To conserve compile-time, we avoid walking to the next clobbering def.
2018 // Instead, we just try to get the optimized access, if it exists. DSE
2019 // will try to optimize defs during the earlier traversal.
2020 if (Def->isOptimized())
2021 UpperDef = dyn_cast<MemoryDef>(Def->getOptimized());
2022 else
2023 UpperDef = dyn_cast<MemoryDef>(Def->getDefiningAccess());
2024 if (!UpperDef || MSSA.isLiveOnEntryDef(UpperDef))
2025 continue;
2026
2027 Instruction *UpperInst = UpperDef->getMemoryInst();
2028 auto IsRedundantStore = [&]() {
2029 if (DefInst->isIdenticalTo(UpperInst))
2030 return true;
2031 if (auto *MemSetI = dyn_cast<MemSetInst>(UpperInst)) {
2032 if (auto *SI = dyn_cast<StoreInst>(DefInst)) {
2033 // MemSetInst must have a write location.
2034 MemoryLocation UpperLoc = *getLocForWrite(UpperInst);
2035 int64_t InstWriteOffset = 0;
2036 int64_t DepWriteOffset = 0;
2037 auto OR = isOverwrite(UpperInst, DefInst, UpperLoc, *MaybeDefLoc,
2038 InstWriteOffset, DepWriteOffset);
2039 Value *StoredByte = isBytewiseValue(SI->getValueOperand(), DL);
2040 return StoredByte && StoredByte == MemSetI->getOperand(1) &&
2041 OR == OW_Complete;
2042 }
2043 }
2044 return false;
2045 };
2046
2047 if (!IsRedundantStore() || isReadClobber(*MaybeDefLoc, DefInst))
2048 continue;
2049 LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " << *DefInst
2050 << '\n');
2051 deleteDeadInstruction(DefInst);
2052 NumRedundantStores++;
2053 MadeChange = true;
2054 }
2055 return MadeChange;
2056 }
2057};
2058
2059static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
2061 const TargetLibraryInfo &TLI,
2062 const LoopInfo &LI) {
2063 bool MadeChange = false;
2064
2065 DSEState State(F, AA, MSSA, DT, PDT, TLI, LI);
2066 // For each store:
2067 for (unsigned I = 0; I < State.MemDefs.size(); I++) {
2068 MemoryDef *KillingDef = State.MemDefs[I];
2069 if (State.SkipStores.count(KillingDef))
2070 continue;
2071 Instruction *KillingI = KillingDef->getMemoryInst();
2072
2073 std::optional<MemoryLocation> MaybeKillingLoc;
2074 if (State.isMemTerminatorInst(KillingI)) {
2075 if (auto KillingLoc = State.getLocForTerminator(KillingI))
2076 MaybeKillingLoc = KillingLoc->first;
2077 } else {
2078 MaybeKillingLoc = State.getLocForWrite(KillingI);
2079 }
2080
2081 if (!MaybeKillingLoc) {
2082 LLVM_DEBUG(dbgs() << "Failed to find analyzable write location for "
2083 << *KillingI << "\n");
2084 continue;
2085 }
2086 MemoryLocation KillingLoc = *MaybeKillingLoc;
2087 assert(KillingLoc.Ptr && "KillingLoc should not be null");
2088 const Value *KillingUndObj = getUnderlyingObject(KillingLoc.Ptr);
2089 LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs killed by "
2090 << *KillingDef << " (" << *KillingI << ")\n");
2091
2092 unsigned ScanLimit = MemorySSAScanLimit;
2093 unsigned WalkerStepLimit = MemorySSAUpwardsStepLimit;
2094 unsigned PartialLimit = MemorySSAPartialStoreLimit;
2095 // Worklist of MemoryAccesses that may be killed by KillingDef.
2097 ToCheck.insert(KillingDef->getDefiningAccess());
2098
2099 bool Shortend = false;
2100 bool IsMemTerm = State.isMemTerminatorInst(KillingI);
2101 // Check if MemoryAccesses in the worklist are killed by KillingDef.
2102 for (unsigned I = 0; I < ToCheck.size(); I++) {
2103 MemoryAccess *Current = ToCheck[I];
2104 if (State.SkipStores.count(Current))
2105 continue;
2106
2107 std::optional<MemoryAccess *> MaybeDeadAccess = State.getDomMemoryDef(
2108 KillingDef, Current, KillingLoc, KillingUndObj, ScanLimit,
2109 WalkerStepLimit, IsMemTerm, PartialLimit);
2110
2111 if (!MaybeDeadAccess) {
2112 LLVM_DEBUG(dbgs() << " finished walk\n");
2113 continue;
2114 }
2115
2116 MemoryAccess *DeadAccess = *MaybeDeadAccess;
2117 LLVM_DEBUG(dbgs() << " Checking if we can kill " << *DeadAccess);
2118 if (isa<MemoryPhi>(DeadAccess)) {
2119 LLVM_DEBUG(dbgs() << "\n ... adding incoming values to worklist\n");
2120 for (Value *V : cast<MemoryPhi>(DeadAccess)->incoming_values()) {
2121 MemoryAccess *IncomingAccess = cast<MemoryAccess>(V);
2122 BasicBlock *IncomingBlock = IncomingAccess->getBlock();
2123 BasicBlock *PhiBlock = DeadAccess->getBlock();
2124
2125 // We only consider incoming MemoryAccesses that come before the
2126 // MemoryPhi. Otherwise we could discover candidates that do not
2127 // strictly dominate our starting def.
2128 if (State.PostOrderNumbers[IncomingBlock] >
2129 State.PostOrderNumbers[PhiBlock])
2130 ToCheck.insert(IncomingAccess);
2131 }
2132 continue;
2133 }
2134 auto *DeadDefAccess = cast<MemoryDef>(DeadAccess);
2135 Instruction *DeadI = DeadDefAccess->getMemoryInst();
2136 LLVM_DEBUG(dbgs() << " (" << *DeadI << ")\n");
2137 ToCheck.insert(DeadDefAccess->getDefiningAccess());
2138 NumGetDomMemoryDefPassed++;
2139
2140 if (!DebugCounter::shouldExecute(MemorySSACounter))
2141 continue;
2142
2143 MemoryLocation DeadLoc = *State.getLocForWrite(DeadI);
2144
2145 if (IsMemTerm) {
2146 const Value *DeadUndObj = getUnderlyingObject(DeadLoc.Ptr);
2147 if (KillingUndObj != DeadUndObj)
2148 continue;
2149 LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *DeadI
2150 << "\n KILLER: " << *KillingI << '\n');
2151 State.deleteDeadInstruction(DeadI);
2152 ++NumFastStores;
2153 MadeChange = true;
2154 } else {
2155 // Check if DeadI overwrites KillingI.
2156 int64_t KillingOffset = 0;
2157 int64_t DeadOffset = 0;
2158 OverwriteResult OR = State.isOverwrite(
2159 KillingI, DeadI, KillingLoc, DeadLoc, KillingOffset, DeadOffset);
2160 if (OR == OW_MaybePartial) {
2161 auto Iter = State.IOLs.insert(
2162 std::make_pair<BasicBlock *, InstOverlapIntervalsTy>(
2163 DeadI->getParent(), InstOverlapIntervalsTy()));
2164 auto &IOL = Iter.first->second;
2165 OR = isPartialOverwrite(KillingLoc, DeadLoc, KillingOffset,
2166 DeadOffset, DeadI, IOL);
2167 }
2168
2169 if (EnablePartialStoreMerging && OR == OW_PartialEarlierWithFullLater) {
2170 auto *DeadSI = dyn_cast<StoreInst>(DeadI);
2171 auto *KillingSI = dyn_cast<StoreInst>(KillingI);
2172 // We are re-using tryToMergePartialOverlappingStores, which requires
2173 // DeadSI to dominate DeadSI.
2174 // TODO: implement tryToMergeParialOverlappingStores using MemorySSA.
2175 if (DeadSI && KillingSI && DT.dominates(DeadSI, KillingSI)) {
2177 KillingSI, DeadSI, KillingOffset, DeadOffset, State.DL,
2178 State.BatchAA, &DT)) {
2179
2180 // Update stored value of earlier store to merged constant.
2181 DeadSI->setOperand(0, Merged);
2182 ++NumModifiedStores;
2183 MadeChange = true;
2184
2185 Shortend = true;
2186 // Remove killing store and remove any outstanding overlap
2187 // intervals for the updated store.
2188 State.deleteDeadInstruction(KillingSI);
2189 auto I = State.IOLs.find(DeadSI->getParent());
2190 if (I != State.IOLs.end())
2191 I->second.erase(DeadSI);
2192 break;
2193 }
2194 }
2195 }
2196
2197 if (OR == OW_Complete) {
2198 LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *DeadI
2199 << "\n KILLER: " << *KillingI << '\n');
2200 State.deleteDeadInstruction(DeadI);
2201 ++NumFastStores;
2202 MadeChange = true;
2203 }
2204 }
2205 }
2206
2207 // Check if the store is a no-op.
2208 if (!Shortend && State.storeIsNoop(KillingDef, KillingUndObj)) {
2209 LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " << *KillingI
2210 << '\n');
2211 State.deleteDeadInstruction(KillingI);
2212 NumRedundantStores++;
2213 MadeChange = true;
2214 continue;
2215 }
2216
2217 // Can we form a calloc from a memset/malloc pair?
2218 if (!Shortend && State.tryFoldIntoCalloc(KillingDef, KillingUndObj)) {
2219 LLVM_DEBUG(dbgs() << "DSE: Remove memset after forming calloc:\n"
2220 << " DEAD: " << *KillingI << '\n');
2221 State.deleteDeadInstruction(KillingI);
2222 MadeChange = true;
2223 continue;
2224 }
2225 }
2226
2228 for (auto &KV : State.IOLs)
2229 MadeChange |= State.removePartiallyOverlappedStores(KV.second);
2230
2231 MadeChange |= State.eliminateRedundantStoresOfExistingValues();
2232 MadeChange |= State.eliminateDeadWritesAtEndOfFunction();
2233 return MadeChange;
2234}
2235} // end anonymous namespace
2236
2237//===----------------------------------------------------------------------===//
2238// DSE Pass
2239//===----------------------------------------------------------------------===//
2244 MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
2246 LoopInfo &LI = AM.getResult<LoopAnalysis>(F);
2247
2248 bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, LI);
2249
2250#ifdef LLVM_ENABLE_STATS
2252 for (auto &I : instructions(F))
2253 NumRemainingStores += isa<StoreInst>(&I);
2254#endif
2255
2256 if (!Changed)
2257 return PreservedAnalyses::all();
2258
2262 PA.preserve<LoopAnalysis>();
2263 return PA;
2264}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file implements a class to represent arbitrary precision integral constant values and operations...
static const Function * getParent(const Value *V)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static void shortenAssignment(Instruction *Inst, Value *OriginalDest, uint64_t OldOffsetInBits, uint64_t OldSizeInBits, uint64_t NewSizeInBits, bool IsOverwriteEnd)
static bool isShortenableAtTheEnd(Instruction *I)
Returns true if the end of this instruction can be safely shortened in length.
static cl::opt< bool > EnablePartialStoreMerging("enable-dse-partial-store-merging", cl::init(true), cl::Hidden, cl::desc("Enable partial store merging in DSE"))
static bool tryToShortenBegin(Instruction *DeadI, OverlapIntervalsTy &IntervalMap, int64_t &DeadStart, uint64_t &DeadSize)
std::map< int64_t, int64_t > OverlapIntervalsTy
static bool isShortenableAtTheBeginning(Instruction *I)
Returns true if the beginning of this instruction can be safely shortened in length.
static cl::opt< unsigned > MemorySSADefsPerBlockLimit("dse-memoryssa-defs-per-block-limit", cl::init(5000), cl::Hidden, cl::desc("The number of MemoryDefs we consider as candidates to eliminated " "other stores per basic block (default = 5000)"))
static Constant * tryToMergePartialOverlappingStores(StoreInst *KillingI, StoreInst *DeadI, int64_t KillingOffset, int64_t DeadOffset, const DataLayout &DL, BatchAAResults &AA, DominatorTree *DT)
static bool memoryIsNotModifiedBetween(Instruction *FirstI, Instruction *SecondI, BatchAAResults &AA, const DataLayout &DL, DominatorTree *DT)
Returns true if the memory which is accessed by the second instruction is not modified between the fi...
static OverwriteResult isMaskedStoreOverwrite(const Instruction *KillingI, const Instruction *DeadI, BatchAAResults &AA)
Check if two instruction are masked stores that completely overwrite one another.
static cl::opt< unsigned > MemorySSAOtherBBStepCost("dse-memoryssa-otherbb-cost", cl::init(5), cl::Hidden, cl::desc("The cost of a step in a different basic " "block than the killing MemoryDef" "(default = 5)"))
static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart, uint64_t &DeadSize, int64_t KillingStart, uint64_t KillingSize, bool IsOverwriteEnd)
static cl::opt< unsigned > MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(150), cl::Hidden, cl::desc("The number of memory instructions to scan for " "dead store elimination (default = 150)"))
static cl::opt< unsigned > MemorySSASameBBStepCost("dse-memoryssa-samebb-cost", cl::init(1), cl::Hidden, cl::desc("The cost of a step in the same basic block as the killing MemoryDef" "(default = 1)"))
static cl::opt< bool > EnablePartialOverwriteTracking("enable-dse-partial-overwrite-tracking", cl::init(true), cl::Hidden, cl::desc("Enable partial-overwrite tracking in DSE"))
static OverwriteResult isPartialOverwrite(const MemoryLocation &KillingLoc, const MemoryLocation &DeadLoc, int64_t KillingOff, int64_t DeadOff, Instruction *DeadI, InstOverlapIntervalsTy &IOL)
Return 'OW_Complete' if a store to the 'KillingLoc' location completely overwrites a store to the 'De...
static cl::opt< unsigned > MemorySSAPartialStoreLimit("dse-memoryssa-partial-store-limit", cl::init(5), cl::Hidden, cl::desc("The maximum number candidates that only partially overwrite the " "killing MemoryDef to consider" " (default = 5)"))
static std::optional< TypeSize > getPointerSize(const Value *V, const DataLayout &DL, const TargetLibraryInfo &TLI, const Function *F)
static bool tryToShortenEnd(Instruction *DeadI, OverlapIntervalsTy &IntervalMap, int64_t &DeadStart, uint64_t &DeadSize)
static cl::opt< unsigned > MemorySSAUpwardsStepLimit("dse-memoryssa-walklimit", cl::init(90), cl::Hidden, cl::desc("The maximum number of steps while walking upwards to find " "MemoryDefs that may be killed (default = 90)"))
static cl::opt< bool > OptimizeMemorySSA("dse-optimize-memoryssa", cl::init(true), cl::Hidden, cl::desc("Allow DSE to optimize memory accesses."))
static cl::opt< unsigned > MemorySSAPathCheckLimit("dse-memoryssa-path-check-limit", cl::init(50), cl::Hidden, cl::desc("The maximum number of blocks to check when trying to prove that " "all paths to an exit go through a killing block (default = 50)"))
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
Definition: DebugCounter.h:182
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
uint64_t Addr
uint64_t Size
This is the interface for a simple mod/ref and alias analysis over globals.
Hexagon Common GEP
IRTranslator LLVM IR MI
Select target instructions out of generic instructions
static void deleteDeadInstruction(Instruction *I)
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file implements a map that provides insertion order iteration.
This file provides utility analysis objects describing memory locations.
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
Module.h This file contains the declarations for the Module class.
Contains a collection of routines for determining if a given instruction is guaranteed to execute if ...
This header defines various interfaces for pass management in LLVM.
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
A manager for alias analyses.
Class for arbitrary precision integers.
Definition: APInt.h:76
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:236
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1433
The possible results of an alias query.
Definition: AliasAnalysis.h:82
@ NoAlias
The two locations do not alias at all.
@ PartialAlias
The two locations alias, but only due to a partial overlap.
@ MustAlias
The two locations precisely alias each other.
constexpr int32_t getOffset() const
constexpr bool hasOffset() const
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:649
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:803
This class represents an incoming formal argument to a Function.
Definition: Argument.h:28
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:205
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:165
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Represents analyses that only rely on functions' control flow.
Definition: PassManager.h:133
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:748
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:888
This is an important base class in LLVM.
Definition: Constant.h:41
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:76
Assignment ID.
static DIAssignID * getDistinct(LLVMContext &Context)
static std::optional< FragmentInfo > getFragmentInfo(expr_op_iterator Start, expr_op_iterator End)
Retrieve the details of this fragment expression.
static std::optional< DIExpression * > createFragmentExpression(const DIExpression *Expr, unsigned OffsetInBits, unsigned SizeInBits)
Create a DIExpression to describe one part of an aggregate variable that is fragmented across multipl...
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
This represents the llvm.dbg.assign instruction.
void setAssignId(DIAssignID *New)
void setKillAddress()
Kill the address component.
void setExpression(DIExpression *NewExpr)
DIExpression * getExpression() const
static bool shouldExecute(unsigned CounterName)
Definition: DebugCounter.h:72
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
bool erase(const KeyT &Val)
Definition: DenseMap.h:329
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:278
NodeT * findNearestCommonDominator(NodeT *A, NodeT *B) const
Find nearest common dominator basic block for basic block A and B.
iterator_range< root_iterator > roots()
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:165
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:322
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:123
Context-sensitive CaptureInfo provider, which computes and caches the earliest common dominator closu...
void removeInstruction(Instruction *I)
const BasicBlock & getEntryBlock() const
Definition: Function.h:778
static GetElementPtrInst * CreateInBounds(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Create an "inbounds" getelementptr.
Definition: Instructions.h:989
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2636
bool mayThrow(bool IncludePhaseOneUnwind=false) const LLVM_READONLY
Return true if this instruction may throw an exception.
Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
bool mayWriteToMemory() const LLVM_READONLY
Return true if this instruction may modify memory.
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:71
bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
const BasicBlock * getParent() const
Definition: Instruction.h:134
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:93
bool mayReadFromMemory() const LLVM_READONLY
Return true if this instruction may read memory.
bool isIdenticalTo(const Instruction *I) const LLVM_READONLY
Return true if the specified instruction is exactly identical to the current one.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:430
const_iterator begin() const
Definition: IntervalMap.h:1146
bool empty() const
empty - Return true when no intervals are mapped.
Definition: IntervalMap.h:1101
const_iterator end() const
Definition: IntervalMap.h:1158
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
static LocationSize precise(uint64_t Value)
bool isScalable() const
TypeSize getValue() const
bool isPrecise() const
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:566
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:44
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:36
iterator end()
Definition: MapVector.h:71
iterator find(const KeyT &Key)
Definition: MapVector.h:167
Value * getLength() const
Value * getValue() const
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
BasicBlock * getBlock() const
Definition: MemorySSA.h:164
Represents a read-write access to memory, whether it is a must-alias, or a may-alias.
Definition: MemorySSA.h:372
void setOptimized(MemoryAccess *MA)
Definition: MemorySSA.h:392
Representation for a specific memory location.
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
LocationSize Size
The maximum size of the location, in address-units, or UnknownSize if the size is not known.
static MemoryLocation getAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location after Ptr, while remaining within the underlying objec...
MemoryLocation getWithNewPtr(const Value *NewPtr) const
const Value * Ptr
The address of the start of the location.
static MemoryLocation getForDest(const MemIntrinsic *MI)
Return a location representing the destination of a memory set or transfer.
static std::optional< MemoryLocation > getOrNone(const Instruction *Inst)
An analysis that produces MemorySSA for a function.
Definition: MemorySSA.h:923
MemoryAccess * getClobberingMemoryAccess(const Instruction *I, BatchAAResults &AA)
Given a memory Mod/Ref/ModRef'ing instruction, calling this will give you the nearest dominating Memo...
Definition: MemorySSA.h:1035
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:700
MemorySSAWalker * getSkipSelfWalker()
Definition: MemorySSA.cpp:1560
MemorySSAWalker * getWalker()
Definition: MemorySSA.cpp:1547
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
Definition: MemorySSA.h:717
bool isLiveOnEntryDef(const MemoryAccess *MA) const
Return true if MA represents the live on entry value.
Definition: MemorySSA.h:737
MemoryAccess * getDefiningAccess() const
Get the access that produces the memory state used by this Use.
Definition: MemorySSA.h:262
Instruction * getMemoryInst() const
Get the instruction that this MemoryUse represents.
Definition: MemorySSA.h:259
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.h:275
PHITransAddr - An address value which tracks and handles phi translation.
Definition: PHITransAddr.h:35
Value * translateValue(BasicBlock *CurBB, BasicBlock *PredBB, const DominatorTree *DT, bool MustDominate)
translateValue - PHI translate the current address up the CFG from CurBB to Pred, updating our state ...
bool isPotentiallyPHITranslatable() const
isPotentiallyPHITranslatable - If this needs PHI translation, return true if we have some hope of doi...
bool needsPHITranslationFromBlock(BasicBlock *BB) const
needsPHITranslationFromBlock - Return true if moving from the specified BasicBlock to its predecessor...
Definition: PHITransAddr.h:62
Value * getAddr() const
Definition: PHITransAddr.h:58
Analysis pass which computes a PostDominatorTree.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
bool dominates(const Instruction *I1, const Instruction *I2) const
Return true if I1 dominates I2.
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:172
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:178
void preserveSet()
Mark an analysis set as preserved.
Definition: PassManager.h:208
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:193
A vector that has set insertion semantics.
Definition: SetVector.h:57
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:98
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:384
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:366
iterator begin() const
Definition: SmallPtrSet.h:404
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:390
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:451
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
An instruction for storing to memory.
Definition: Instructions.h:301
Value * getValueOperand()
Definition: Instructions.h:390
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool has(LibFunc F) const
Tests whether a library function is available.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:334
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static IntegerType * getInt8Ty(LLVMContext &C)
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
op_range operands()
Definition: User.h:242
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:693
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1074
iterator_range< use_iterator > uses()
Definition: Value.h:376
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:173
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:780
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:147
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:545
AssignmentMarkerRange getAssignmentMarkers(DIAssignID *ID)
Return a range of dbg.assign intrinsics which use \ID as an operand.
Definition: DebugInfo.cpp:1748
bool calculateFragmentIntersect(const DataLayout &DL, const Value *Dest, uint64_t SliceOffsetInBits, uint64_t SliceSizeInBits, const DbgAssignIntrinsic *DAI, std::optional< DIExpression::FragmentInfo > &Result)
Calculate the fragment of the variable in DAI covered from (Dest + SliceOffsetInBits) to to (Dest + S...
Definition: DebugInfo.cpp:1803
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
NodeAddr< DefNode * > Def
Definition: RDFGraph.h:384
NodeAddr< FuncNode * > Func
Definition: RDFGraph.h:393
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
Constant * getInitialValueOfAllocation(const Value *V, const TargetLibraryInfo *TLI, Type *Ty)
If this is a call to an allocation function that initializes memory to a fixed value,...
bool isStrongerThanMonotonic(AtomicOrdering AO)
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:145
void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
Definition: Utils.cpp:1394
Value * emitCalloc(Value *Num, Value *Size, IRBuilderBase &B, const TargetLibraryInfo &TLI)
Emit a call to the calloc function.
Value * GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, const DataLayout &DL, bool AllowNonInbounds=true)
Analyze the specified pointer to see if it can be expressed as a base pointer plus a constant offset.
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value,...
iterator_range< po_iterator< T > > post_order(const T &G)
bool isNoAliasCall(const Value *V)
Return true if this pointer is returned by a noalias function.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1733
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition: Local.cpp:399
bool getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout &DL, const TargetLibraryInfo *TLI, ObjectSizeOpts Opts={})
Compute the size of the object pointed by Ptr.
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:428
bool isModSet(const ModRefInfo MRI)
Definition: ModRef.h:48
bool PointerMayBeCaptured(const Value *V, bool ReturnCaptures, bool StoreCaptures, unsigned MaxUsesToExplore=0)
PointerMayBeCaptured - Return true if this pointer value may be captured by the enclosing function (w...
bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
Definition: Function.cpp:2007
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool AreStatisticsEnabled()
Check if statistics are enabled.
Definition: Statistic.cpp:139
bool isNotVisibleOnUnwind(const Value *Object, bool &RequiresNoCaptureBeforeUnwind)
Return true if Object memory is not visible after an unwind, in the sense that program semantics cann...
uint64_t offsetToAlignment(uint64_t Value, Align Alignment)
Returns the offset to the next integer (mod 2**64) that is greater than or equal to Value and is a mu...
Definition: Alignment.h:197
bool salvageKnowledge(Instruction *I, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr)
Calls BuildAssumeFromInst and if the resulting llvm.assume is valid insert if before I.
Value * getFreedOperand(const CallBase *CB, const TargetLibraryInfo *TLI)
If this if a call to a free function, return the freed operand.
Value * isBytewiseValue(Value *V, const DataLayout &DL)
If the specified value can be set by repeating the same byte in memory, return the i8 value that it i...
auto predecessors(const MachineBasicBlock *BB)
bool mayContainIrreducibleControl(const Function &F, const LoopInfo *LI)
bool isIdentifiedObject(const Value *V)
Return true if this pointer refers to a distinct and identifiable object.
bool isStrongerThan(AtomicOrdering AO, AtomicOrdering Other)
Returns true if ao is stronger than other as defined by the AtomicOrdering lattice,...
bool isRefSet(const ModRefInfo MRI)
Definition: ModRef.h:51
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Holds the characteristics of one fragment of a larger variable.
Various options to control the behavior of getObjectSize.
bool NullIsUnknownSize
If this is true, null pointers in address space 0 will be treated as though they can't be evaluated.