LLVM 23.0.0git
DeadStoreElimination.cpp
Go to the documentation of this file.
1//===- DeadStoreElimination.cpp - MemorySSA Backed Dead Store Elimination -===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// The code below implements dead store elimination using MemorySSA. It uses
10// the following general approach: given a MemoryDef, walk upwards to find
11// clobbering MemoryDefs that may be killed by the starting def. Then check
12// that there are no uses that may read the location of the original MemoryDef
13// in between both MemoryDefs. A bit more concretely:
14//
15// For all MemoryDefs StartDef:
16// 1. Get the next dominating clobbering MemoryDef (MaybeDeadAccess) by walking
17// upwards.
18// 2. Check that there are no reads between MaybeDeadAccess and the StartDef by
19// checking all uses starting at MaybeDeadAccess and walking until we see
20// StartDef.
21// 3. For each found CurrentDef, check that:
22// 1. There are no barrier instructions between CurrentDef and StartDef (like
23// throws or stores with ordering constraints).
24// 2. StartDef is executed whenever CurrentDef is executed.
25// 3. StartDef completely overwrites CurrentDef.
26// 4. Erase CurrentDef from the function and MemorySSA.
27//
28//===----------------------------------------------------------------------===//
29
31#include "llvm/ADT/APInt.h"
32#include "llvm/ADT/DenseMap.h"
33#include "llvm/ADT/MapVector.h"
36#include "llvm/ADT/SetVector.h"
39#include "llvm/ADT/Statistic.h"
40#include "llvm/ADT/StringRef.h"
46#include "llvm/Analysis/Loads.h"
55#include "llvm/IR/Argument.h"
57#include "llvm/IR/BasicBlock.h"
58#include "llvm/IR/Constant.h"
60#include "llvm/IR/Constants.h"
61#include "llvm/IR/DataLayout.h"
62#include "llvm/IR/DebugInfo.h"
63#include "llvm/IR/Dominators.h"
64#include "llvm/IR/Function.h"
65#include "llvm/IR/IRBuilder.h"
67#include "llvm/IR/InstrTypes.h"
68#include "llvm/IR/Instruction.h"
71#include "llvm/IR/Module.h"
72#include "llvm/IR/PassManager.h"
74#include "llvm/IR/Value.h"
78#include "llvm/Support/Debug.h"
86#include <algorithm>
87#include <cassert>
88#include <cstdint>
89#include <map>
90#include <optional>
91#include <utility>
92
93using namespace llvm;
94using namespace PatternMatch;
95
96#define DEBUG_TYPE "dse"
97
98STATISTIC(NumRemainingStores, "Number of stores remaining after DSE");
99STATISTIC(NumRedundantStores, "Number of redundant stores deleted");
100STATISTIC(NumFastStores, "Number of stores deleted");
101STATISTIC(NumFastOther, "Number of other instrs removed");
102STATISTIC(NumCompletePartials, "Number of stores dead by later partials");
103STATISTIC(NumModifiedStores, "Number of stores modified");
104STATISTIC(NumCFGChecks, "Number of stores modified");
105STATISTIC(NumCFGTries, "Number of stores modified");
106STATISTIC(NumCFGSuccess, "Number of stores modified");
107STATISTIC(NumGetDomMemoryDefPassed,
108 "Number of times a valid candidate is returned from getDomMemoryDef");
109STATISTIC(NumDomMemDefChecks,
110 "Number iterations check for reads in getDomMemoryDef");
111
112DEBUG_COUNTER(MemorySSACounter, "dse-memoryssa",
113 "Controls which MemoryDefs are eliminated.");
114
115static cl::opt<bool>
116EnablePartialOverwriteTracking("enable-dse-partial-overwrite-tracking",
117 cl::init(true), cl::Hidden,
118 cl::desc("Enable partial-overwrite tracking in DSE"));
119
120static cl::opt<bool>
121EnablePartialStoreMerging("enable-dse-partial-store-merging",
122 cl::init(true), cl::Hidden,
123 cl::desc("Enable partial store merging in DSE"));
124
126 MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(150), cl::Hidden,
127 cl::desc("The number of memory instructions to scan for "
128 "dead store elimination (default = 150)"));
130 "dse-memoryssa-walklimit", cl::init(90), cl::Hidden,
131 cl::desc("The maximum number of steps while walking upwards to find "
132 "MemoryDefs that may be killed (default = 90)"));
133
135 "dse-memoryssa-partial-store-limit", cl::init(5), cl::Hidden,
136 cl::desc("The maximum number candidates that only partially overwrite the "
137 "killing MemoryDef to consider"
138 " (default = 5)"));
139
141 "dse-memoryssa-defs-per-block-limit", cl::init(5000), cl::Hidden,
142 cl::desc("The number of MemoryDefs we consider as candidates to eliminated "
143 "other stores per basic block (default = 5000)"));
144
146 "dse-memoryssa-samebb-cost", cl::init(1), cl::Hidden,
147 cl::desc(
148 "The cost of a step in the same basic block as the killing MemoryDef"
149 "(default = 1)"));
150
152 MemorySSAOtherBBStepCost("dse-memoryssa-otherbb-cost", cl::init(5),
154 cl::desc("The cost of a step in a different basic "
155 "block than the killing MemoryDef"
156 "(default = 5)"));
157
159 "dse-memoryssa-path-check-limit", cl::init(50), cl::Hidden,
160 cl::desc("The maximum number of blocks to check when trying to prove that "
161 "all paths to an exit go through a killing block (default = 50)"));
162
163// This flags allows or disallows DSE to optimize MemorySSA during its
164// traversal. Note that DSE optimizing MemorySSA may impact other passes
165// downstream of the DSE invocation and can lead to issues not being
166// reproducible in isolation (i.e. when MemorySSA is built from scratch). In
167// those cases, the flag can be used to check if DSE's MemorySSA optimizations
168// impact follow-up passes.
169static cl::opt<bool>
170 OptimizeMemorySSA("dse-optimize-memoryssa", cl::init(true), cl::Hidden,
171 cl::desc("Allow DSE to optimize memory accesses."));
172
173// TODO: remove this flag.
175 "enable-dse-initializes-attr-improvement", cl::init(true), cl::Hidden,
176 cl::desc("Enable the initializes attr improvement in DSE"));
177
179 "dse-max-dom-cond-depth", cl::init(1024), cl::Hidden,
180 cl::desc("Max dominator tree recursion depth for eliminating redundant "
181 "stores via dominating conditions"));
182
183//===----------------------------------------------------------------------===//
184// Helper functions
185//===----------------------------------------------------------------------===//
186using OverlapIntervalsTy = std::map<int64_t, int64_t>;
188
189/// Returns true if the end of this instruction can be safely shortened in
190/// length.
192 // Don't shorten stores for now
193 if (isa<StoreInst>(I))
194 return false;
195
197 switch (II->getIntrinsicID()) {
198 default: return false;
199 case Intrinsic::memset:
200 case Intrinsic::memcpy:
201 case Intrinsic::memcpy_element_unordered_atomic:
202 case Intrinsic::memset_element_unordered_atomic:
203 // Do shorten memory intrinsics.
204 // FIXME: Add memmove if it's also safe to transform.
205 return true;
206 }
207 }
208
209 // Don't shorten libcalls calls for now.
210
211 return false;
212}
213
214/// Returns true if the beginning of this instruction can be safely shortened
215/// in length.
217 // FIXME: Handle only memset for now. Supporting memcpy/memmove should be
218 // easily done by offsetting the source address.
219 return isa<AnyMemSetInst>(I);
220}
221
222static std::optional<TypeSize> getPointerSize(const Value *V,
223 const DataLayout &DL,
224 const TargetLibraryInfo &TLI,
225 const Function *F) {
227 ObjectSizeOpts Opts;
229
230 if (getObjectSize(V, Size, DL, &TLI, Opts))
231 return TypeSize::getFixed(Size);
232 return std::nullopt;
233}
234
235namespace {
236
237enum OverwriteResult {
238 OW_Begin,
239 OW_Complete,
240 OW_End,
241 OW_PartialEarlierWithFullLater,
242 OW_MaybePartial,
243 OW_None,
244 OW_Unknown
245};
246
247} // end anonymous namespace
248
249/// Check if two instruction are masked stores that completely
250/// overwrite one another. More specifically, \p KillingI has to
251/// overwrite \p DeadI.
252static OverwriteResult isMaskedStoreOverwrite(const Instruction *KillingI,
253 const Instruction *DeadI,
255 const auto *KillingII = dyn_cast<IntrinsicInst>(KillingI);
256 const auto *DeadII = dyn_cast<IntrinsicInst>(DeadI);
257 if (KillingII == nullptr || DeadII == nullptr)
258 return OW_Unknown;
259 if (KillingII->getIntrinsicID() != DeadII->getIntrinsicID())
260 return OW_Unknown;
261
262 switch (KillingII->getIntrinsicID()) {
263 case Intrinsic::masked_store:
264 case Intrinsic::vp_store: {
265 const DataLayout &DL = KillingII->getDataLayout();
266 auto *KillingTy = KillingII->getArgOperand(0)->getType();
267 auto *DeadTy = DeadII->getArgOperand(0)->getType();
268 if (DL.getTypeSizeInBits(KillingTy) != DL.getTypeSizeInBits(DeadTy))
269 return OW_Unknown;
270 // Element count.
271 if (cast<VectorType>(KillingTy)->getElementCount() !=
272 cast<VectorType>(DeadTy)->getElementCount())
273 return OW_Unknown;
274 // Pointers.
275 Value *KillingPtr = KillingII->getArgOperand(1);
276 Value *DeadPtr = DeadII->getArgOperand(1);
277 if (KillingPtr != DeadPtr && !AA.isMustAlias(KillingPtr, DeadPtr))
278 return OW_Unknown;
279 if (KillingII->getIntrinsicID() == Intrinsic::masked_store) {
280 // Masks.
281 // TODO: check that KillingII's mask is a superset of the DeadII's mask.
282 if (KillingII->getArgOperand(2) != DeadII->getArgOperand(2))
283 return OW_Unknown;
284 } else if (KillingII->getIntrinsicID() == Intrinsic::vp_store) {
285 // Masks.
286 // TODO: check that KillingII's mask is a superset of the DeadII's mask.
287 if (KillingII->getArgOperand(2) != DeadII->getArgOperand(2))
288 return OW_Unknown;
289 // Lengths.
290 if (KillingII->getArgOperand(3) != DeadII->getArgOperand(3))
291 return OW_Unknown;
292 }
293 return OW_Complete;
294 }
295 default:
296 return OW_Unknown;
297 }
298}
299
300/// Return 'OW_Complete' if a store to the 'KillingLoc' location completely
301/// overwrites a store to the 'DeadLoc' location, 'OW_End' if the end of the
302/// 'DeadLoc' location is completely overwritten by 'KillingLoc', 'OW_Begin'
303/// if the beginning of the 'DeadLoc' location is overwritten by 'KillingLoc'.
304/// 'OW_PartialEarlierWithFullLater' means that a dead (big) store was
305/// overwritten by a killing (smaller) store which doesn't write outside the big
306/// store's memory locations. Returns 'OW_Unknown' if nothing can be determined.
307/// NOTE: This function must only be called if both \p KillingLoc and \p
308/// DeadLoc belong to the same underlying object with valid \p KillingOff and
309/// \p DeadOff.
310static OverwriteResult isPartialOverwrite(const MemoryLocation &KillingLoc,
311 const MemoryLocation &DeadLoc,
312 int64_t KillingOff, int64_t DeadOff,
313 Instruction *DeadI,
315 const uint64_t KillingSize = KillingLoc.Size.getValue();
316 const uint64_t DeadSize = DeadLoc.Size.getValue();
317 // We may now overlap, although the overlap is not complete. There might also
318 // be other incomplete overlaps, and together, they might cover the complete
319 // dead store.
320 // Note: The correctness of this logic depends on the fact that this function
321 // is not even called providing DepWrite when there are any intervening reads.
323 KillingOff < int64_t(DeadOff + DeadSize) &&
324 int64_t(KillingOff + KillingSize) >= DeadOff) {
325
326 // Insert our part of the overlap into the map.
327 auto &IM = IOL[DeadI];
328 LLVM_DEBUG(dbgs() << "DSE: Partial overwrite: DeadLoc [" << DeadOff << ", "
329 << int64_t(DeadOff + DeadSize) << ") KillingLoc ["
330 << KillingOff << ", " << int64_t(KillingOff + KillingSize)
331 << ")\n");
332
333 // Make sure that we only insert non-overlapping intervals and combine
334 // adjacent intervals. The intervals are stored in the map with the ending
335 // offset as the key (in the half-open sense) and the starting offset as
336 // the value.
337 int64_t KillingIntStart = KillingOff;
338 int64_t KillingIntEnd = KillingOff + KillingSize;
339
340 // Find any intervals ending at, or after, KillingIntStart which start
341 // before KillingIntEnd.
342 auto ILI = IM.lower_bound(KillingIntStart);
343 if (ILI != IM.end() && ILI->second <= KillingIntEnd) {
344 // This existing interval is overlapped with the current store somewhere
345 // in [KillingIntStart, KillingIntEnd]. Merge them by erasing the existing
346 // intervals and adjusting our start and end.
347 KillingIntStart = std::min(KillingIntStart, ILI->second);
348 KillingIntEnd = std::max(KillingIntEnd, ILI->first);
349 ILI = IM.erase(ILI);
350
351 // Continue erasing and adjusting our end in case other previous
352 // intervals are also overlapped with the current store.
353 //
354 // |--- dead 1 ---| |--- dead 2 ---|
355 // |------- killing---------|
356 //
357 while (ILI != IM.end() && ILI->second <= KillingIntEnd) {
358 assert(ILI->second > KillingIntStart && "Unexpected interval");
359 KillingIntEnd = std::max(KillingIntEnd, ILI->first);
360 ILI = IM.erase(ILI);
361 }
362 }
363
364 IM[KillingIntEnd] = KillingIntStart;
365
366 ILI = IM.begin();
367 if (ILI->second <= DeadOff && ILI->first >= int64_t(DeadOff + DeadSize)) {
368 LLVM_DEBUG(dbgs() << "DSE: Full overwrite from partials: DeadLoc ["
369 << DeadOff << ", " << int64_t(DeadOff + DeadSize)
370 << ") Composite KillingLoc [" << ILI->second << ", "
371 << ILI->first << ")\n");
372 ++NumCompletePartials;
373 return OW_Complete;
374 }
375 }
376
377 // Check for a dead store which writes to all the memory locations that
378 // the killing store writes to.
379 if (EnablePartialStoreMerging && KillingOff >= DeadOff &&
380 int64_t(DeadOff + DeadSize) > KillingOff &&
381 uint64_t(KillingOff - DeadOff) + KillingSize <= DeadSize) {
382 LLVM_DEBUG(dbgs() << "DSE: Partial overwrite a dead load [" << DeadOff
383 << ", " << int64_t(DeadOff + DeadSize)
384 << ") by a killing store [" << KillingOff << ", "
385 << int64_t(KillingOff + KillingSize) << ")\n");
386 // TODO: Maybe come up with a better name?
387 return OW_PartialEarlierWithFullLater;
388 }
389
390 // Another interesting case is if the killing store overwrites the end of the
391 // dead store.
392 //
393 // |--dead--|
394 // |-- killing --|
395 //
396 // In this case we may want to trim the size of dead store to avoid
397 // generating stores to addresses which will definitely be overwritten killing
398 // store.
400 (KillingOff > DeadOff && KillingOff < int64_t(DeadOff + DeadSize) &&
401 int64_t(KillingOff + KillingSize) >= int64_t(DeadOff + DeadSize)))
402 return OW_End;
403
404 // Finally, we also need to check if the killing store overwrites the
405 // beginning of the dead store.
406 //
407 // |--dead--|
408 // |-- killing --|
409 //
410 // In this case we may want to move the destination address and trim the size
411 // of dead store to avoid generating stores to addresses which will definitely
412 // be overwritten killing store.
414 (KillingOff <= DeadOff && int64_t(KillingOff + KillingSize) > DeadOff)) {
415 assert(int64_t(KillingOff + KillingSize) < int64_t(DeadOff + DeadSize) &&
416 "Expect to be handled as OW_Complete");
417 return OW_Begin;
418 }
419 // Otherwise, they don't completely overlap.
420 return OW_Unknown;
421}
422
423/// Returns true if the memory which is accessed by the second instruction is not
424/// modified between the first and the second instruction.
425/// Precondition: Second instruction must be dominated by the first
426/// instruction.
427static bool
430 DominatorTree *DT) {
431 // Do a backwards scan through the CFG from SecondI to FirstI. Look for
432 // instructions which can modify the memory location accessed by SecondI.
433 //
434 // While doing the walk keep track of the address to check. It might be
435 // different in different basic blocks due to PHI translation.
436 using BlockAddressPair = std::pair<BasicBlock *, PHITransAddr>;
438 // Keep track of the address we visited each block with. Bail out if we
439 // visit a block with different addresses.
441
442 BasicBlock::iterator FirstBBI(FirstI);
443 ++FirstBBI;
444 BasicBlock::iterator SecondBBI(SecondI);
445 BasicBlock *FirstBB = FirstI->getParent();
446 BasicBlock *SecondBB = SecondI->getParent();
447 MemoryLocation MemLoc;
448 if (auto *MemSet = dyn_cast<MemSetInst>(SecondI))
449 MemLoc = MemoryLocation::getForDest(MemSet);
450 else
451 MemLoc = MemoryLocation::get(SecondI);
452
453 auto *MemLocPtr = const_cast<Value *>(MemLoc.Ptr);
454
455 // Start checking the SecondBB.
456 WorkList.push_back(
457 std::make_pair(SecondBB, PHITransAddr(MemLocPtr, DL, nullptr)));
458 bool isFirstBlock = true;
459
460 // Check all blocks going backward until we reach the FirstBB.
461 while (!WorkList.empty()) {
462 BlockAddressPair Current = WorkList.pop_back_val();
463 BasicBlock *B = Current.first;
464 PHITransAddr &Addr = Current.second;
465 Value *Ptr = Addr.getAddr();
466
467 // Ignore instructions before FirstI if this is the FirstBB.
468 BasicBlock::iterator BI = (B == FirstBB ? FirstBBI : B->begin());
469
471 if (isFirstBlock) {
472 // Ignore instructions after SecondI if this is the first visit of SecondBB.
473 assert(B == SecondBB && "first block is not the store block");
474 EI = SecondBBI;
475 isFirstBlock = false;
476 } else {
477 // It's not SecondBB or (in case of a loop) the second visit of SecondBB.
478 // In this case we also have to look at instructions after SecondI.
479 EI = B->end();
480 }
481 for (; BI != EI; ++BI) {
482 Instruction *I = &*BI;
483 if (I->mayWriteToMemory() && I != SecondI)
484 if (isModSet(AA.getModRefInfo(I, MemLoc.getWithNewPtr(Ptr))))
485 return false;
486 }
487 if (B != FirstBB) {
488 assert(B != &FirstBB->getParent()->getEntryBlock() &&
489 "Should not hit the entry block because SI must be dominated by LI");
490 for (BasicBlock *Pred : predecessors(B)) {
491 PHITransAddr PredAddr = Addr;
492 if (PredAddr.needsPHITranslationFromBlock(B)) {
493 if (!PredAddr.isPotentiallyPHITranslatable())
494 return false;
495 if (!PredAddr.translateValue(B, Pred, DT, false))
496 return false;
497 }
498 Value *TranslatedPtr = PredAddr.getAddr();
499 auto Inserted = Visited.insert(std::make_pair(Pred, TranslatedPtr));
500 if (!Inserted.second) {
501 // We already visited this block before. If it was with a different
502 // address - bail out!
503 if (TranslatedPtr != Inserted.first->second)
504 return false;
505 // ... otherwise just skip it.
506 continue;
507 }
508 WorkList.push_back(std::make_pair(Pred, PredAddr));
509 }
510 }
511 }
512 return true;
513}
514
515static void shortenAssignment(Instruction *Inst, Value *OriginalDest,
516 uint64_t OldOffsetInBits, uint64_t OldSizeInBits,
517 uint64_t NewSizeInBits, bool IsOverwriteEnd) {
518 const DataLayout &DL = Inst->getDataLayout();
519 uint64_t DeadSliceSizeInBits = OldSizeInBits - NewSizeInBits;
520 uint64_t DeadSliceOffsetInBits =
521 OldOffsetInBits + (IsOverwriteEnd ? NewSizeInBits : 0);
522 auto SetDeadFragExpr = [](auto *Assign,
523 DIExpression::FragmentInfo DeadFragment) {
524 // createFragmentExpression expects an offset relative to the existing
525 // fragment offset if there is one.
526 uint64_t RelativeOffset = DeadFragment.OffsetInBits -
527 Assign->getExpression()
528 ->getFragmentInfo()
529 .value_or(DIExpression::FragmentInfo(0, 0))
530 .OffsetInBits;
532 Assign->getExpression(), RelativeOffset, DeadFragment.SizeInBits)) {
533 Assign->setExpression(*NewExpr);
534 return;
535 }
536 // Failed to create a fragment expression for this so discard the value,
537 // making this a kill location.
539 DIExpression::get(Assign->getContext(), {}), DeadFragment.OffsetInBits,
540 DeadFragment.SizeInBits);
541 Assign->setExpression(Expr);
542 Assign->setKillLocation();
543 };
544
545 // A DIAssignID to use so that the inserted dbg.assign intrinsics do not
546 // link to any instructions. Created in the loop below (once).
547 DIAssignID *LinkToNothing = nullptr;
548 LLVMContext &Ctx = Inst->getContext();
549 auto GetDeadLink = [&Ctx, &LinkToNothing]() {
550 if (!LinkToNothing)
551 LinkToNothing = DIAssignID::getDistinct(Ctx);
552 return LinkToNothing;
553 };
554
555 // Insert an unlinked dbg.assign intrinsic for the dead fragment after each
556 // overlapping dbg.assign intrinsic.
557 for (DbgVariableRecord *Assign : at::getDVRAssignmentMarkers(Inst)) {
558 std::optional<DIExpression::FragmentInfo> NewFragment;
559 if (!at::calculateFragmentIntersect(DL, OriginalDest, DeadSliceOffsetInBits,
560 DeadSliceSizeInBits, Assign,
561 NewFragment) ||
562 !NewFragment) {
563 // We couldn't calculate the intersecting fragment for some reason. Be
564 // cautious and unlink the whole assignment from the store.
565 Assign->setKillAddress();
566 Assign->setAssignId(GetDeadLink());
567 continue;
568 }
569 // No intersect.
570 if (NewFragment->SizeInBits == 0)
571 continue;
572
573 // Fragments overlap: insert a new dbg.assign for this dead part.
574 auto *NewAssign = static_cast<decltype(Assign)>(Assign->clone());
575 NewAssign->insertAfter(Assign->getIterator());
576 NewAssign->setAssignId(GetDeadLink());
577 if (NewFragment)
578 SetDeadFragExpr(NewAssign, *NewFragment);
579 NewAssign->setKillAddress();
580 }
581}
582
583/// Update the attributes given that a memory access is updated (the
584/// dereferenced pointer could be moved forward when shortening a
585/// mem intrinsic).
586static void adjustArgAttributes(AnyMemIntrinsic *Intrinsic, unsigned ArgNo,
587 uint64_t PtrOffset) {
588 // Remember old attributes.
589 AttributeSet OldAttrs = Intrinsic->getParamAttributes(ArgNo);
590
591 // Find attributes that should be kept, and remove the rest.
592 AttributeMask AttrsToRemove;
593 for (auto &Attr : OldAttrs) {
594 if (Attr.hasKindAsEnum()) {
595 switch (Attr.getKindAsEnum()) {
596 default:
597 break;
598 case Attribute::Alignment:
599 // Only keep alignment if PtrOffset satisfy the alignment.
600 if (isAligned(Attr.getAlignment().valueOrOne(), PtrOffset))
601 continue;
602 break;
603 case Attribute::Dereferenceable:
604 case Attribute::DereferenceableOrNull:
605 // We could reduce the size of these attributes according to
606 // PtrOffset. But we simply drop these for now.
607 break;
608 case Attribute::NonNull:
609 case Attribute::NoUndef:
610 continue;
611 }
612 }
613 AttrsToRemove.addAttribute(Attr);
614 }
615
616 // Remove the attributes that should be dropped.
617 Intrinsic->removeParamAttrs(ArgNo, AttrsToRemove);
618}
619
620static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart,
621 uint64_t &DeadSize, int64_t KillingStart,
622 uint64_t KillingSize, bool IsOverwriteEnd) {
623 auto *DeadIntrinsic = cast<AnyMemIntrinsic>(DeadI);
624 Align PrefAlign = DeadIntrinsic->getDestAlign().valueOrOne();
625
626 // We assume that memet/memcpy operates in chunks of the "largest" native
627 // type size and aligned on the same value. That means optimal start and size
628 // of memset/memcpy should be modulo of preferred alignment of that type. That
629 // is it there is no any sense in trying to reduce store size any further
630 // since any "extra" stores comes for free anyway.
631 // On the other hand, maximum alignment we can achieve is limited by alignment
632 // of initial store.
633
634 // TODO: Limit maximum alignment by preferred (or abi?) alignment of the
635 // "largest" native type.
636 // Note: What is the proper way to get that value?
637 // Should TargetTransformInfo::getRegisterBitWidth be used or anything else?
638 // PrefAlign = std::min(DL.getPrefTypeAlign(LargestType), PrefAlign);
639
640 int64_t ToRemoveStart = 0;
641 uint64_t ToRemoveSize = 0;
642 // Compute start and size of the region to remove. Make sure 'PrefAlign' is
643 // maintained on the remaining store.
644 if (IsOverwriteEnd) {
645 // Calculate required adjustment for 'KillingStart' in order to keep
646 // remaining store size aligned on 'PerfAlign'.
647 uint64_t Off =
648 offsetToAlignment(uint64_t(KillingStart - DeadStart), PrefAlign);
649 ToRemoveStart = KillingStart + Off;
650 if (DeadSize <= uint64_t(ToRemoveStart - DeadStart))
651 return false;
652 ToRemoveSize = DeadSize - uint64_t(ToRemoveStart - DeadStart);
653 } else {
654 ToRemoveStart = DeadStart;
655 assert(KillingSize >= uint64_t(DeadStart - KillingStart) &&
656 "Not overlapping accesses?");
657 ToRemoveSize = KillingSize - uint64_t(DeadStart - KillingStart);
658 // Calculate required adjustment for 'ToRemoveSize'in order to keep
659 // start of the remaining store aligned on 'PerfAlign'.
660 uint64_t Off = offsetToAlignment(ToRemoveSize, PrefAlign);
661 if (Off != 0) {
662 if (ToRemoveSize <= (PrefAlign.value() - Off))
663 return false;
664 ToRemoveSize -= PrefAlign.value() - Off;
665 }
666 assert(isAligned(PrefAlign, ToRemoveSize) &&
667 "Should preserve selected alignment");
668 }
669
670 assert(ToRemoveSize > 0 && "Shouldn't reach here if nothing to remove");
671 assert(DeadSize > ToRemoveSize && "Can't remove more than original size");
672
673 uint64_t NewSize = DeadSize - ToRemoveSize;
674 if (DeadIntrinsic->isAtomic()) {
675 // When shortening an atomic memory intrinsic, the newly shortened
676 // length must remain an integer multiple of the element size.
677 const uint32_t ElementSize = DeadIntrinsic->getElementSizeInBytes();
678 if (0 != NewSize % ElementSize)
679 return false;
680 }
681
682 LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW "
683 << (IsOverwriteEnd ? "END" : "BEGIN") << ": " << *DeadI
684 << "\n KILLER [" << ToRemoveStart << ", "
685 << int64_t(ToRemoveStart + ToRemoveSize) << ")\n");
686
687 DeadIntrinsic->setLength(NewSize);
688 DeadIntrinsic->setDestAlignment(PrefAlign);
689
690 Value *OrigDest = DeadIntrinsic->getRawDest();
691 if (!IsOverwriteEnd) {
692 Value *Indices[1] = {
693 ConstantInt::get(DeadIntrinsic->getLength()->getType(), ToRemoveSize)};
695 Type::getInt8Ty(DeadIntrinsic->getContext()), OrigDest, Indices, "",
696 DeadI->getIterator());
697 NewDestGEP->setDebugLoc(DeadIntrinsic->getDebugLoc());
698 DeadIntrinsic->setDest(NewDestGEP);
699 adjustArgAttributes(DeadIntrinsic, 0, ToRemoveSize);
700 }
701
702 // Update attached dbg.assign intrinsics. Assume 8-bit byte.
703 shortenAssignment(DeadI, OrigDest, DeadStart * 8, DeadSize * 8, NewSize * 8,
704 IsOverwriteEnd);
705
706 // Finally update start and size of dead access.
707 if (!IsOverwriteEnd)
708 DeadStart += ToRemoveSize;
709 DeadSize = NewSize;
710
711 return true;
712}
713
715 int64_t &DeadStart, uint64_t &DeadSize) {
716 if (IntervalMap.empty() || !isShortenableAtTheEnd(DeadI))
717 return false;
718
719 OverlapIntervalsTy::iterator OII = --IntervalMap.end();
720 int64_t KillingStart = OII->second;
721 uint64_t KillingSize = OII->first - KillingStart;
722
723 assert(OII->first - KillingStart >= 0 && "Size expected to be positive");
724
725 if (KillingStart > DeadStart &&
726 // Note: "KillingStart - KillingStart" is known to be positive due to
727 // preceding check.
728 (uint64_t)(KillingStart - DeadStart) < DeadSize &&
729 // Note: "DeadSize - (uint64_t)(KillingStart - DeadStart)" is known to
730 // be non negative due to preceding checks.
731 KillingSize >= DeadSize - (uint64_t)(KillingStart - DeadStart)) {
732 if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
733 true)) {
734 IntervalMap.erase(OII);
735 return true;
736 }
737 }
738 return false;
739}
740
743 int64_t &DeadStart, uint64_t &DeadSize) {
745 return false;
746
747 OverlapIntervalsTy::iterator OII = IntervalMap.begin();
748 int64_t KillingStart = OII->second;
749 uint64_t KillingSize = OII->first - KillingStart;
750
751 assert(OII->first - KillingStart >= 0 && "Size expected to be positive");
752
753 if (KillingStart <= DeadStart &&
754 // Note: "DeadStart - KillingStart" is known to be non negative due to
755 // preceding check.
756 KillingSize > (uint64_t)(DeadStart - KillingStart)) {
757 // Note: "KillingSize - (uint64_t)(DeadStart - DeadStart)" is known to
758 // be positive due to preceding checks.
759 assert(KillingSize - (uint64_t)(DeadStart - KillingStart) < DeadSize &&
760 "Should have been handled as OW_Complete");
761 if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
762 false)) {
763 IntervalMap.erase(OII);
764 return true;
765 }
766 }
767 return false;
768}
769
770static Constant *
772 int64_t KillingOffset, int64_t DeadOffset,
774 DominatorTree *DT) {
775 assert(KillingI);
776 assert(DeadI);
777
778 // If the store we find is:
779 // a) partially overwritten by the store to 'Loc'
780 // b) the killing store is fully contained in the dead one and
781 // c) they both have a constant value
782 // d) none of the two stores need padding
783 // Merge the two stores, replacing the dead store's value with a
784 // merge of both values.
785 //
786 // TODO: Deal with other constant types (vectors, etc), and probably
787 // some mem intrinsics (if needed)
788 if (!isa<ConstantInt>(DeadI->getValueOperand()) ||
789 !DL.typeSizeEqualsStoreSize(DeadI->getValueOperand()->getType()) ||
790 !isa<ConstantInt>(KillingI->getValueOperand()) ||
791 !DL.typeSizeEqualsStoreSize(KillingI->getValueOperand()->getType()) ||
792 !memoryIsNotModifiedBetween(DeadI, KillingI, AA, DL, DT))
793 return nullptr;
794
795 // The merge erases KillingI and writes its bytes via DeadI. For that to be
796 // safe:
797 // - KillingI must be deletable (not volatile, ordering at most unordered),
798 // - DeadI must be safe to rewrite, and
799 // - their orderings must match, so the bytes originally written by
800 // KillingI keep the same atomicity after they are folded into DeadI.
801 // This allows merging two simple stores or two unordered-atomic stores with
802 // matching ordering, while leaving volatile and ordered-atomic stores in
803 // place.
804 if (!KillingI->isUnordered() || !DeadI->isUnordered() ||
805 KillingI->getOrdering() != DeadI->getOrdering())
806 return nullptr;
807
808 APInt DeadValue = cast<ConstantInt>(DeadI->getValueOperand())->getValue();
809 APInt KillingValue =
810 cast<ConstantInt>(KillingI->getValueOperand())->getValue();
811 unsigned KillingBits = KillingValue.getBitWidth();
812 assert(DeadValue.getBitWidth() > KillingValue.getBitWidth());
813 KillingValue = KillingValue.zext(DeadValue.getBitWidth());
814
815 // Offset of the smaller store inside the larger store
816 unsigned BitOffsetDiff = (KillingOffset - DeadOffset) * 8;
817 unsigned LShiftAmount =
818 DL.isBigEndian() ? DeadValue.getBitWidth() - BitOffsetDiff - KillingBits
819 : BitOffsetDiff;
820 APInt Mask = APInt::getBitsSet(DeadValue.getBitWidth(), LShiftAmount,
821 LShiftAmount + KillingBits);
822 // Clear the bits we'll be replacing, then OR with the smaller
823 // store, shifted appropriately.
824 APInt Merged = (DeadValue & ~Mask) | (KillingValue << LShiftAmount);
825 LLVM_DEBUG(dbgs() << "DSE: Merge Stores:\n Dead: " << *DeadI
826 << "\n Killing: " << *KillingI
827 << "\n Merged Value: " << Merged << '\n');
828 return ConstantInt::get(DeadI->getValueOperand()->getType(), Merged);
829}
830
831// Returns true if \p I is an intrinsic that does not read or write memory.
834 switch (II->getIntrinsicID()) {
835 case Intrinsic::lifetime_start:
836 case Intrinsic::lifetime_end:
837 case Intrinsic::invariant_end:
838 case Intrinsic::launder_invariant_group:
839 case Intrinsic::assume:
840 return true;
841 case Intrinsic::dbg_declare:
842 case Intrinsic::dbg_label:
843 case Intrinsic::dbg_value:
844 llvm_unreachable("Intrinsic should not be modeled in MemorySSA");
845 default:
846 return false;
847 }
848 }
849 return false;
850}
851
852// Check if we can ignore \p D for DSE.
853static bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) {
854 Instruction *DI = D->getMemoryInst();
855 // Calls that only access inaccessible memory cannot read or write any memory
856 // locations we consider for elimination.
857 if (auto *CB = dyn_cast<CallBase>(DI))
858 if (CB->onlyAccessesInaccessibleMemory())
859 return true;
860
861 // We can eliminate stores to locations not visible to the caller across
862 // throwing instructions.
863 if (DI->mayThrow() && !DefVisibleToCaller)
864 return true;
865
866 // We can remove the dead stores, irrespective of the fence and its ordering
867 // (release/acquire/seq_cst). Fences only constraints the ordering of
868 // already visible stores, it does not make a store visible to other
869 // threads. So, skipping over a fence does not change a store from being
870 // dead.
871 if (isa<FenceInst>(DI))
872 return true;
873
874 // Skip intrinsics that do not really read or modify memory.
875 if (isNoopIntrinsic(DI))
876 return true;
877
878 return false;
879}
880
881namespace {
882
883// A memory location wrapper that represents a MemoryLocation, `MemLoc`,
884// defined by `MemDef`.
885struct MemoryLocationWrapper {
886 MemoryLocationWrapper(MemoryLocation MemLoc, MemoryDef *MemDef,
887 bool DefByInitializesAttr)
888 : MemLoc(MemLoc), MemDef(MemDef),
889 DefByInitializesAttr(DefByInitializesAttr) {
890 assert(MemLoc.Ptr && "MemLoc should be not null");
891 UnderlyingObject = getUnderlyingObject(MemLoc.Ptr);
892 DefInst = MemDef->getMemoryInst();
893 }
894
895 MemoryLocation MemLoc;
896 const Value *UnderlyingObject;
897 MemoryDef *MemDef;
898 Instruction *DefInst;
899 bool DefByInitializesAttr = false;
900};
901
902// A memory def wrapper that represents a MemoryDef and the MemoryLocation(s)
903// defined by this MemoryDef.
904struct MemoryDefWrapper {
905 MemoryDefWrapper(MemoryDef *MemDef,
906 ArrayRef<std::pair<MemoryLocation, bool>> MemLocations) {
907 DefInst = MemDef->getMemoryInst();
908 for (auto &[MemLoc, DefByInitializesAttr] : MemLocations)
909 DefinedLocations.push_back(
910 MemoryLocationWrapper(MemLoc, MemDef, DefByInitializesAttr));
911 }
912 Instruction *DefInst;
914};
915
916struct ArgumentInitInfo {
917 unsigned Idx;
918 bool IsDeadOrInvisibleOnUnwind;
919 ConstantRangeList Inits;
920};
921} // namespace
922
925 return CB && CB->getArgOperandWithAttribute(Attribute::Initializes);
926}
927
928// Return the intersected range list of the initializes attributes of "Args".
929// "Args" are call arguments that alias to each other.
930// If any argument in "Args" doesn't have dead_on_unwind attr and
931// "CallHasNoUnwindAttr" is false, return empty.
934 bool CallHasNoUnwindAttr) {
935 if (Args.empty())
936 return {};
937
938 // To address unwind, the function should have nounwind attribute or the
939 // arguments have dead or invisible on unwind. Otherwise, return empty.
940 for (const auto &Arg : Args) {
941 if (!CallHasNoUnwindAttr && !Arg.IsDeadOrInvisibleOnUnwind)
942 return {};
943 if (Arg.Inits.empty())
944 return {};
945 }
946
947 ConstantRangeList IntersectedIntervals = Args.front().Inits;
948 for (auto &Arg : Args.drop_front())
949 IntersectedIntervals = IntersectedIntervals.intersectWith(Arg.Inits);
950
951 return IntersectedIntervals;
952}
953
954namespace {
955
956struct DSEState {
957 Function &F;
958 AliasAnalysis &AA;
959 EarliestEscapeAnalysis EA;
960
961 /// The single BatchAA instance that is used to cache AA queries. It will
962 /// not be invalidated over the whole run. This is safe, because:
963 /// 1. Only memory writes are removed, so the alias cache for memory
964 /// locations remains valid.
965 /// 2. No new instructions are added (only instructions removed), so cached
966 /// information for a deleted value cannot be accessed by a re-used new
967 /// value pointer.
968 BatchAAResults BatchAA;
969
970 MemorySSA &MSSA;
971 DominatorTree &DT;
972 PostDominatorTree &PDT;
973 const TargetLibraryInfo &TLI;
974 const DataLayout &DL;
975 const CycleInfo &CI;
976
977 // All MemoryDefs that potentially could kill other MemDefs.
979 // Any that should be skipped as they are already deleted
980 SmallPtrSet<MemoryAccess *, 4> SkipStores;
981 // Keep track whether a given object is captured before return or not.
982 DenseMap<const Value *, bool> CapturedBeforeReturn;
983 // Keep track of all of the objects that are invisible to the caller after
984 // the function returns.
985 DenseMap<const Value *, bool> InvisibleToCallerAfterRet;
986 DenseMap<const Value *, uint64_t> InvisibleToCallerAfterRetBounded;
987 // Keep track of blocks with throwing instructions not modeled in MemorySSA.
988 SmallPtrSet<BasicBlock *, 16> ThrowingBlocks;
989 // Post-order numbers for each basic block. Used to figure out if memory
990 // accesses are executed before another access.
991 DenseMap<BasicBlock *, unsigned> PostOrderNumbers;
992
993 /// Keep track of instructions (partly) overlapping with killing MemoryDefs per
994 /// basic block.
995 MapVector<BasicBlock *, InstOverlapIntervalsTy> IOLs;
996 // Check if there are root nodes that are terminated by UnreachableInst.
997 // Those roots pessimize post-dominance queries. If there are such roots,
998 // fall back to CFG scan starting from all non-unreachable roots.
999 bool AnyUnreachableExit;
1000
1001 // Whether or not we should iterate on removing dead stores at the end of the
1002 // function due to removing a store causing a previously captured pointer to
1003 // no longer be captured.
1004 bool ShouldIterateEndOfFunctionDSE;
1005
1006 /// Dead instructions to be removed at the end of DSE.
1008
1009 // Class contains self-reference, make sure it's not copied/moved.
1010 DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT,
1011 PostDominatorTree &PDT, const TargetLibraryInfo &TLI,
1012 const CycleInfo &CI);
1013 DSEState(const DSEState &) = delete;
1014 DSEState &operator=(const DSEState &) = delete;
1015
1016 LocationSize strengthenLocationSize(const Instruction *I,
1017 LocationSize Size) const;
1018
1019 /// Return 'OW_Complete' if a store to the 'KillingLoc' location (by \p
1020 /// KillingI instruction) completely overwrites a store to the 'DeadLoc'
1021 /// location (by \p DeadI instruction).
1022 /// Return OW_MaybePartial if \p KillingI does not completely overwrite
1023 /// \p DeadI, but they both write to the same underlying object. In that
1024 /// case, use isPartialOverwrite to check if \p KillingI partially overwrites
1025 /// \p DeadI. Returns 'OR_None' if \p KillingI is known to not overwrite the
1026 /// \p DeadI. Returns 'OW_Unknown' if nothing can be determined.
1027 OverwriteResult isOverwrite(const Instruction *KillingI,
1028 const Instruction *DeadI,
1029 const MemoryLocation &KillingLoc,
1030 const MemoryLocation &DeadLoc,
1031 int64_t &KillingOff, int64_t &DeadOff);
1032
1033 bool isInvisibleToCallerAfterRet(const Value *V, const Value *Ptr,
1034 const LocationSize StoreSize);
1035
1036 bool isInvisibleToCallerOnUnwind(const Value *V);
1037
1038 std::optional<MemoryLocation> getLocForWrite(Instruction *I) const;
1039
1040 // Returns a list of <MemoryLocation, bool> pairs written by I.
1041 // The bool means whether the write is from Initializes attr.
1043 getLocForInst(Instruction *I, bool ConsiderInitializesAttr);
1044
1045 /// Assuming this instruction has a dead analyzable write, can we delete
1046 /// this instruction?
1047 bool isRemovable(Instruction *I);
1048
1049 /// Returns true if \p UseInst completely overwrites \p DefLoc
1050 /// (stored by \p DefInst).
1051 bool isCompleteOverwrite(const MemoryLocation &DefLoc, Instruction *DefInst,
1052 Instruction *UseInst);
1053
1054 /// Returns true if \p Def is not read before returning from the function.
1055 bool isWriteAtEndOfFunction(MemoryDef *Def, const MemoryLocation &DefLoc);
1056
1057 /// If \p I is a memory terminator like llvm.lifetime.end or free, return a
1058 /// pair with the MemoryLocation terminated by \p I and a boolean flag
1059 /// indicating whether \p I is a free-like call.
1060 std::optional<std::pair<MemoryLocation, bool>>
1061 getLocForTerminator(Instruction *I) const;
1062
1063 /// Returns true if \p I is a memory terminator instruction like
1064 /// llvm.lifetime.end or free.
1065 bool isMemTerminatorInst(Instruction *I) const;
1066
1067 /// Returns true if \p MaybeTerm is a memory terminator for \p Loc from
1068 /// instruction \p AccessI.
1069 bool isMemTerminator(const MemoryLocation &Loc, Instruction *AccessI,
1070 Instruction *MaybeTerm);
1071
1072 // Returns true if \p Use may read from \p DefLoc.
1073 bool isReadClobber(const MemoryLocation &DefLoc, Instruction *UseInst);
1074
1075 /// Returns true if a dependency between \p Current and \p KillingDef is
1076 /// guaranteed to be loop invariant for the loops that they are in. Either
1077 /// because they are known to be in the same block, in the same loop level or
1078 /// by guaranteeing that \p CurrentLoc only references a single MemoryLocation
1079 /// during execution of the containing function.
1080 bool isGuaranteedLoopIndependent(const Instruction *Current,
1081 const Instruction *KillingDef,
1082 const MemoryLocation &CurrentLoc);
1083
1084 /// Returns true if \p Ptr is guaranteed to be loop invariant for any possible
1085 /// loop. In particular, this guarantees that it only references a single
1086 /// MemoryLocation during execution of the containing function.
1087 bool isGuaranteedLoopInvariant(const Value *Ptr);
1088
1089 // Find a MemoryDef writing to \p KillingLoc and dominating \p StartAccess,
1090 // with no read access between them or on any other path to a function exit
1091 // block if \p KillingLoc is not accessible after the function returns. If
1092 // there is no such MemoryDef, return std::nullopt. The returned value may not
1093 // (completely) overwrite \p KillingLoc. Currently we bail out when we
1094 // encounter an aliasing MemoryUse (read).
1095 std::optional<MemoryAccess *>
1096 getDomMemoryDef(MemoryDef *KillingDef, MemoryAccess *StartAccess,
1097 const MemoryLocation &KillingLoc, const Value *KillingUndObj,
1098 unsigned &ScanLimit, unsigned &WalkerStepLimit,
1099 bool IsMemTerm, unsigned &PartialLimit,
1100 bool IsInitializesAttrMemLoc);
1101
1102 /// Delete dead memory defs and recursively add their operands to ToRemove if
1103 /// they became dead.
1104 void
1105 deleteDeadInstruction(Instruction *SI,
1106 SmallPtrSetImpl<MemoryAccess *> *Deleted = nullptr);
1107
1108 // Check for any extra throws between \p KillingI and \p DeadI that block
1109 // DSE. This only checks extra maythrows (those that aren't MemoryDef's).
1110 // MemoryDef that may throw are handled during the walk from one def to the
1111 // next.
1112 bool mayThrowBetween(Instruction *KillingI, Instruction *DeadI,
1113 const Value *KillingUndObj);
1114
1115 // Check if \p DeadI acts as a DSE barrier for \p KillingI. The following
1116 // instructions act as barriers:
1117 // * A memory instruction that may throw and \p KillingI accesses a non-stack
1118 // object.
1119 // * Atomic stores stronger that monotonic.
1120 bool isDSEBarrier(const Value *KillingUndObj, Instruction *DeadI);
1121
1122 /// Eliminate writes to objects that are not visible in the caller and are not
1123 /// accessed before returning from the function.
1124 bool eliminateDeadWritesAtEndOfFunction();
1125
1126 /// If we have a zero initializing memset following a call to malloc,
1127 /// try folding it into a call to calloc.
1128 bool tryFoldIntoCalloc(MemoryDef *Def, const Value *DefUO);
1129
1130 /// \returns true if \p Def is a no-op store, either because it
1131 /// directly stores back a loaded value or stores zero to a calloced object.
1132 bool storeIsNoop(MemoryDef *Def, const Value *DefUO);
1133
1134 bool removePartiallyOverlappedStores(InstOverlapIntervalsTy &IOL);
1135
1136 /// Eliminates writes to locations where the value that is being written
1137 /// is already stored at the same location.
1138 bool eliminateRedundantStoresOfExistingValues();
1139
1140 /// If there is a dominating condition that implies the value being stored in
1141 /// a pointer, and such a condition appears in a node that dominates the
1142 /// store, then the store may be redundant if no write occurs in between.
1143 bool eliminateRedundantStoresViaDominatingConditions();
1144
1145 // Return the locations written by the initializes attribute.
1146 // Note that this function considers:
1147 // 1. Unwind edge: use "initializes" attribute only if the callee has
1148 // "nounwind" attribute, or the argument has "dead_on_unwind" attribute,
1149 // or the argument is invisible to caller on unwind. That is, we don't
1150 // perform incorrect DSE on unwind edges in the current function.
1151 // 2. Argument alias: for aliasing arguments, the "initializes" attribute is
1152 // the intersected range list of their "initializes" attributes.
1153 SmallVector<MemoryLocation, 1> getInitializesArgMemLoc(const Instruction *I);
1154
1155 // Try to eliminate dead defs that access `KillingLocWrapper.MemLoc` and are
1156 // killed by `KillingLocWrapper.MemDef`. Return whether
1157 // any changes were made, and whether `KillingLocWrapper.DefInst` was deleted.
1158 std::pair<bool, bool>
1159 eliminateDeadDefs(const MemoryLocationWrapper &KillingLocWrapper);
1160
1161 // Try to eliminate dead defs killed by `KillingDefWrapper` and return the
1162 // change state: whether make any change.
1163 bool eliminateDeadDefs(const MemoryDefWrapper &KillingDefWrapper);
1164};
1165
1166} // end anonymous namespace
1167
1168static void pushMemUses(MemoryAccess *Acc,
1171 for (Use &U : Acc->uses()) {
1172 auto *MA = cast<MemoryAccess>(U.getUser());
1173 if (Visited.insert(MA).second)
1174 WorkList.push_back(MA);
1175 }
1176}
1177
1178// Return true if "Arg" is function local and isn't captured before "CB".
1179static bool isFuncLocalAndNotCaptured(Value *Arg, const CallBase *CB,
1181 const Value *UnderlyingObj = getUnderlyingObject(Arg);
1182 return isIdentifiedFunctionLocal(UnderlyingObj) &&
1183 capturesNothing(EA.getCapturesBefore(UnderlyingObj, CB, /*OrAt=*/true,
1184 /*ReturnCaptures=*/false));
1185}
1186
1187DSEState::DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
1189 const TargetLibraryInfo &TLI, const CycleInfo &CI)
1190 : F(F), AA(AA), EA(DT, nullptr, &CI), BatchAA(AA, &EA), MSSA(MSSA), DT(DT),
1191 PDT(PDT), TLI(TLI), DL(F.getDataLayout()), CI(CI) {
1192 // Collect blocks with throwing instructions not modeled in MemorySSA and
1193 // alloc-like objects.
1194 unsigned PO = 0;
1195 for (BasicBlock *BB : post_order(&F)) {
1196 PostOrderNumbers[BB] = PO++;
1197 for (Instruction &I : *BB) {
1198 MemoryAccess *MA = MSSA.getMemoryAccess(&I);
1199 if (I.mayThrow() && !MA)
1200 ThrowingBlocks.insert(I.getParent());
1201
1202 auto *MD = dyn_cast_or_null<MemoryDef>(MA);
1203 if (MD && MemDefs.size() < MemorySSADefsPerBlockLimit &&
1204 (getLocForWrite(&I) || isMemTerminatorInst(&I) ||
1206 MemDefs.push_back(MD);
1207 }
1208 }
1209
1210 // Treat byval, inalloca or dead on return arguments the same as Allocas,
1211 // stores to them are dead at the end of the function.
1212 for (Argument &AI : F.args()) {
1213 if (AI.hasPassPointeeByValueCopyAttr()) {
1214 InvisibleToCallerAfterRet.insert({&AI, true});
1215 continue;
1216 }
1217
1218 if (!AI.getType()->isPointerTy())
1219 continue;
1220
1221 const DeadOnReturnInfo &Info = AI.getDeadOnReturnInfo();
1222 if (Info.coversAllReachableMemory())
1223 InvisibleToCallerAfterRet.insert({&AI, true});
1224 else if (uint64_t DeadBytes = Info.getNumberOfDeadBytes())
1225 InvisibleToCallerAfterRetBounded.insert({&AI, DeadBytes});
1226 }
1227
1228 AnyUnreachableExit = any_of(PDT.roots(), [](const BasicBlock *E) {
1229 return isa<UnreachableInst>(E->getTerminator());
1230 });
1231}
1232
1233LocationSize DSEState::strengthenLocationSize(const Instruction *I,
1234 LocationSize Size) const {
1235 if (auto *CB = dyn_cast<CallBase>(I)) {
1236 LibFunc F;
1237 if (TLI.getLibFunc(*CB, F) && TLI.has(F) &&
1238 (F == LibFunc_memset_chk || F == LibFunc_memcpy_chk)) {
1239 // Use the precise location size specified by the 3rd argument
1240 // for determining KillingI overwrites DeadLoc if it is a memset_chk
1241 // instruction. memset_chk will write either the amount specified as 3rd
1242 // argument or the function will immediately abort and exit the program.
1243 // NOTE: AA may determine NoAlias if it can prove that the access size
1244 // is larger than the allocation size due to that being UB. To avoid
1245 // returning potentially invalid NoAlias results by AA, limit the use of
1246 // the precise location size to isOverwrite.
1247 if (const auto *Len = dyn_cast<ConstantInt>(CB->getArgOperand(2)))
1248 return LocationSize::precise(Len->getZExtValue());
1249 }
1250 }
1251 return Size;
1252}
1253
1254OverwriteResult DSEState::isOverwrite(const Instruction *KillingI,
1255 const Instruction *DeadI,
1256 const MemoryLocation &KillingLoc,
1257 const MemoryLocation &DeadLoc,
1258 int64_t &KillingOff, int64_t &DeadOff) {
1259 // AliasAnalysis does not always account for loops. Limit overwrite checks
1260 // to dependencies for which we can guarantee they are independent of any
1261 // loops they are in.
1262 if (!isGuaranteedLoopIndependent(DeadI, KillingI, DeadLoc))
1263 return OW_Unknown;
1264
1265 LocationSize KillingLocSize =
1266 strengthenLocationSize(KillingI, KillingLoc.Size);
1267 const Value *DeadPtr = DeadLoc.Ptr->stripPointerCasts();
1268 const Value *KillingPtr = KillingLoc.Ptr->stripPointerCasts();
1269 const Value *DeadUndObj = getUnderlyingObject(DeadPtr);
1270 const Value *KillingUndObj = getUnderlyingObject(KillingPtr);
1271
1272 // Check whether the killing store overwrites the whole object, in which
1273 // case the size/offset of the dead store does not matter.
1274 if (DeadUndObj == KillingUndObj && KillingLocSize.isPrecise() &&
1275 isIdentifiedObject(KillingUndObj)) {
1276 std::optional<TypeSize> KillingUndObjSize =
1277 getPointerSize(KillingUndObj, DL, TLI, &F);
1278 if (KillingUndObjSize && *KillingUndObjSize == KillingLocSize.getValue())
1279 return OW_Complete;
1280 }
1281
1282 // FIXME: Vet that this works for size upper-bounds. Seems unlikely that we'll
1283 // get imprecise values here, though (except for unknown sizes).
1284 if (!KillingLocSize.isPrecise() || !DeadLoc.Size.isPrecise()) {
1285 // In case no constant size is known, try to an IR values for the number
1286 // of bytes written and check if they match.
1287 const auto *KillingMemI = dyn_cast<MemIntrinsic>(KillingI);
1288 const auto *DeadMemI = dyn_cast<MemIntrinsic>(DeadI);
1289 if (KillingMemI && DeadMemI) {
1290 const Value *KillingV = KillingMemI->getLength();
1291 const Value *DeadV = DeadMemI->getLength();
1292 if (KillingV == DeadV && BatchAA.isMustAlias(DeadLoc, KillingLoc))
1293 return OW_Complete;
1294 }
1295
1296 // Masked stores have imprecise locations, but we can reason about them
1297 // to some extent.
1298 return isMaskedStoreOverwrite(KillingI, DeadI, BatchAA);
1299 }
1300
1301 const TypeSize KillingSize = KillingLocSize.getValue();
1302 const TypeSize DeadSize = DeadLoc.Size.getValue();
1303 // Bail on doing Size comparison which depends on AA for now
1304 // TODO: Remove AnyScalable once Alias Analysis deal with scalable vectors
1305 const bool AnyScalable = DeadSize.isScalable() || KillingLocSize.isScalable();
1306
1307 if (AnyScalable)
1308 return OW_Unknown;
1309 // Query the alias information
1310 AliasResult AAR = BatchAA.alias(KillingLoc, DeadLoc);
1311
1312 // If the start pointers are the same, we just have to compare sizes to see if
1313 // the killing store was larger than the dead store.
1314 if (AAR == AliasResult::MustAlias) {
1315 // Make sure that the KillingSize size is >= the DeadSize size.
1316 if (KillingSize >= DeadSize)
1317 return OW_Complete;
1318 }
1319
1320 // If we hit a partial alias we may have a full overwrite
1321 if (AAR == AliasResult::PartialAlias && AAR.hasOffset()) {
1322 int32_t Off = AAR.getOffset();
1323 if (Off >= 0 && (uint64_t)Off + DeadSize <= KillingSize)
1324 return OW_Complete;
1325 }
1326
1327 // If we can't resolve the same pointers to the same object, then we can't
1328 // analyze them at all.
1329 if (DeadUndObj != KillingUndObj) {
1330 // Non aliasing stores to different objects don't overlap. Note that
1331 // if the killing store is known to overwrite whole object (out of
1332 // bounds access overwrites whole object as well) then it is assumed to
1333 // completely overwrite any store to the same object even if they don't
1334 // actually alias (see next check).
1335 if (AAR == AliasResult::NoAlias)
1336 return OW_None;
1337 return OW_Unknown;
1338 }
1339
1340 // Okay, we have stores to two completely different pointers. Try to
1341 // decompose the pointer into a "base + constant_offset" form. If the base
1342 // pointers are equal, then we can reason about the two stores.
1343 DeadOff = 0;
1344 KillingOff = 0;
1345 const Value *DeadBasePtr =
1346 GetPointerBaseWithConstantOffset(DeadPtr, DeadOff, DL);
1347 const Value *KillingBasePtr =
1348 GetPointerBaseWithConstantOffset(KillingPtr, KillingOff, DL);
1349
1350 // If the base pointers still differ, we have two completely different
1351 // stores.
1352 if (DeadBasePtr != KillingBasePtr)
1353 return OW_Unknown;
1354
1355 // The killing access completely overlaps the dead store if and only if
1356 // both start and end of the dead one is "inside" the killing one:
1357 // |<->|--dead--|<->|
1358 // |-----killing------|
1359 // Accesses may overlap if and only if start of one of them is "inside"
1360 // another one:
1361 // |<->|--dead--|<-------->|
1362 // |-------killing--------|
1363 // OR
1364 // |-------dead-------|
1365 // |<->|---killing---|<----->|
1366 //
1367 // We have to be careful here as *Off is signed while *.Size is unsigned.
1368
1369 // Check if the dead access starts "not before" the killing one.
1370 if (DeadOff >= KillingOff) {
1371 // If the dead access ends "not after" the killing access then the
1372 // dead one is completely overwritten by the killing one.
1373 if (uint64_t(DeadOff - KillingOff) + DeadSize <= KillingSize)
1374 return OW_Complete;
1375 // If start of the dead access is "before" end of the killing access
1376 // then accesses overlap.
1377 else if ((uint64_t)(DeadOff - KillingOff) < KillingSize)
1378 return OW_MaybePartial;
1379 }
1380 // If start of the killing access is "before" end of the dead access then
1381 // accesses overlap.
1382 else if ((uint64_t)(KillingOff - DeadOff) < DeadSize) {
1383 return OW_MaybePartial;
1384 }
1385
1386 // Can reach here only if accesses are known not to overlap.
1387 return OW_None;
1388}
1389
1390bool DSEState::isInvisibleToCallerAfterRet(const Value *V, const Value *Ptr,
1391 const LocationSize StoreSize) {
1392 if (isa<AllocaInst>(V))
1393 return true;
1394
1395 auto IBounded = InvisibleToCallerAfterRetBounded.find(V);
1396 if (IBounded != InvisibleToCallerAfterRetBounded.end()) {
1397 int64_t ValueOffset;
1398 [[maybe_unused]] const Value *BaseValue =
1399 GetPointerBaseWithConstantOffset(Ptr, ValueOffset, DL);
1400 // If we are not able to find a constant offset from the UO, we have to
1401 // pessimistically assume that the store writes to memory out of the
1402 // dead_on_return bounds.
1403 if (BaseValue != V)
1404 return false;
1405 // This store is only invisible after return if we are in bounds of the
1406 // range marked dead.
1407 if (StoreSize.hasValue() &&
1408 ValueOffset + StoreSize.getValue() <= IBounded->second &&
1409 ValueOffset >= 0)
1410 return true;
1411 }
1412 auto I = InvisibleToCallerAfterRet.insert({V, false});
1413 if (I.second && isInvisibleToCallerOnUnwind(V) && isNoAliasCall(V))
1414 I.first->second = capturesNothing(
1415 PointerMayBeCaptured(V, CaptureComponents::Provenance).WithRet);
1416 return I.first->second;
1417}
1418
1419bool DSEState::isInvisibleToCallerOnUnwind(const Value *V) {
1420 bool RequiresNoCaptureBeforeUnwind;
1421 if (!isNotVisibleOnUnwind(V, RequiresNoCaptureBeforeUnwind))
1422 return false;
1423 if (!RequiresNoCaptureBeforeUnwind)
1424 return true;
1425
1426 auto I = CapturedBeforeReturn.insert({V, true});
1427 if (I.second)
1428 // NOTE: This could be made more precise by PointerMayBeCapturedBefore
1429 // with the killing MemoryDef. But we refrain from doing so for now to
1430 // limit compile-time and this does not cause any changes to the number
1431 // of stores removed on a large test set in practice.
1432 I.first->second = capturesAnything(
1433 PointerMayBeCaptured(V, CaptureComponents::Provenance).WithoutRet);
1434 return !I.first->second;
1435}
1436
1437std::optional<MemoryLocation> DSEState::getLocForWrite(Instruction *I) const {
1438 if (!I->mayWriteToMemory())
1439 return std::nullopt;
1440
1441 if (auto *CB = dyn_cast<CallBase>(I))
1442 return MemoryLocation::getForDest(CB, TLI);
1443
1445}
1446
1448DSEState::getLocForInst(Instruction *I, bool ConsiderInitializesAttr) {
1450 if (isMemTerminatorInst(I)) {
1451 if (auto Loc = getLocForTerminator(I))
1452 Locations.push_back(std::make_pair(Loc->first, false));
1453 return Locations;
1454 }
1455
1456 if (auto Loc = getLocForWrite(I))
1457 Locations.push_back(std::make_pair(*Loc, false));
1458
1459 if (ConsiderInitializesAttr) {
1460 for (auto &MemLoc : getInitializesArgMemLoc(I)) {
1461 Locations.push_back(std::make_pair(MemLoc, true));
1462 }
1463 }
1464 return Locations;
1465}
1466
1467bool DSEState::isRemovable(Instruction *I) {
1468 assert(getLocForWrite(I) && "Must have analyzable write");
1469
1470 // Don't remove volatile/atomic stores.
1471 if (StoreInst *SI = dyn_cast<StoreInst>(I))
1472 return SI->isUnordered();
1473
1474 if (auto *CB = dyn_cast<CallBase>(I)) {
1475 // Don't remove volatile memory intrinsics.
1476 if (auto *MI = dyn_cast<MemIntrinsic>(CB))
1477 return !MI->isVolatile();
1478
1479 // Never remove dead lifetime intrinsics, e.g. because they are followed
1480 // by a free.
1481 if (CB->isLifetimeStartOrEnd())
1482 return false;
1483
1484 return CB->use_empty() && CB->willReturn() && CB->doesNotThrow() &&
1485 !CB->isTerminator();
1486 }
1487
1488 return false;
1489}
1490
1491bool DSEState::isCompleteOverwrite(const MemoryLocation &DefLoc,
1492 Instruction *DefInst, Instruction *UseInst) {
1493 // UseInst has a MemoryDef associated in MemorySSA. It's possible for a
1494 // MemoryDef to not write to memory, e.g. a volatile load is modeled as a
1495 // MemoryDef.
1496 if (!UseInst->mayWriteToMemory())
1497 return false;
1498
1499 if (auto *CB = dyn_cast<CallBase>(UseInst))
1500 if (CB->onlyAccessesInaccessibleMemory())
1501 return false;
1502
1503 int64_t InstWriteOffset, DepWriteOffset;
1504 if (auto CC = getLocForWrite(UseInst))
1505 return isOverwrite(UseInst, DefInst, *CC, DefLoc, InstWriteOffset,
1506 DepWriteOffset) == OW_Complete;
1507 return false;
1508}
1509
1510bool DSEState::isWriteAtEndOfFunction(MemoryDef *Def,
1511 const MemoryLocation &DefLoc) {
1512 LLVM_DEBUG(dbgs() << " Check if def " << *Def << " ("
1513 << *Def->getMemoryInst()
1514 << ") is at the end the function \n");
1516 SmallPtrSet<MemoryAccess *, 8> Visited;
1517
1518 pushMemUses(Def, WorkList, Visited);
1519 for (unsigned I = 0; I < WorkList.size(); I++) {
1520 if (WorkList.size() >= MemorySSAScanLimit) {
1521 LLVM_DEBUG(dbgs() << " ... hit exploration limit.\n");
1522 return false;
1523 }
1524
1525 MemoryAccess *UseAccess = WorkList[I];
1526 if (isa<MemoryPhi>(UseAccess)) {
1527 // AliasAnalysis does not account for loops. Limit elimination to
1528 // candidates for which we can guarantee they always store to the same
1529 // memory location.
1530 if (!isGuaranteedLoopInvariant(DefLoc.Ptr))
1531 return false;
1532
1533 pushMemUses(cast<MemoryPhi>(UseAccess), WorkList, Visited);
1534 continue;
1535 }
1536 // TODO: Checking for aliasing is expensive. Consider reducing the amount
1537 // of times this is called and/or caching it.
1538 Instruction *UseInst = cast<MemoryUseOrDef>(UseAccess)->getMemoryInst();
1539 if (isReadClobber(DefLoc, UseInst)) {
1540 LLVM_DEBUG(dbgs() << " ... hit read clobber " << *UseInst << ".\n");
1541 return false;
1542 }
1543
1544 if (MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess))
1545 pushMemUses(UseDef, WorkList, Visited);
1546 }
1547 return true;
1548}
1549
1550std::optional<std::pair<MemoryLocation, bool>>
1551DSEState::getLocForTerminator(Instruction *I) const {
1552 if (auto *CB = dyn_cast<CallBase>(I)) {
1553 if (CB->getIntrinsicID() == Intrinsic::lifetime_end)
1554 return {
1555 std::make_pair(MemoryLocation::getForArgument(CB, 0, &TLI), false)};
1556 if (Value *FreedOp = getFreedOperand(CB, &TLI))
1557 return {std::make_pair(MemoryLocation::getAfter(FreedOp), true)};
1558 }
1559
1560 return std::nullopt;
1561}
1562
1563bool DSEState::isMemTerminatorInst(Instruction *I) const {
1564 auto *CB = dyn_cast<CallBase>(I);
1565 return CB && (CB->getIntrinsicID() == Intrinsic::lifetime_end ||
1566 getFreedOperand(CB, &TLI) != nullptr);
1567}
1568
1569bool DSEState::isMemTerminator(const MemoryLocation &Loc, Instruction *AccessI,
1570 Instruction *MaybeTerm) {
1571 std::optional<std::pair<MemoryLocation, bool>> MaybeTermLoc =
1572 getLocForTerminator(MaybeTerm);
1573
1574 if (!MaybeTermLoc)
1575 return false;
1576
1577 // If the terminator is a free-like call, all accesses to the underlying
1578 // object can be considered terminated.
1579 if (getUnderlyingObject(Loc.Ptr) !=
1580 getUnderlyingObject(MaybeTermLoc->first.Ptr))
1581 return false;
1582
1583 auto TermLoc = MaybeTermLoc->first;
1584 if (MaybeTermLoc->second) {
1585 const Value *LocUO = getUnderlyingObject(Loc.Ptr);
1586 return BatchAA.isMustAlias(TermLoc.Ptr, LocUO);
1587 }
1588 int64_t InstWriteOffset = 0;
1589 int64_t DepWriteOffset = 0;
1590 return isOverwrite(MaybeTerm, AccessI, TermLoc, Loc, InstWriteOffset,
1591 DepWriteOffset) == OW_Complete;
1592}
1593
1594bool DSEState::isReadClobber(const MemoryLocation &DefLoc,
1595 Instruction *UseInst) {
1596 if (isNoopIntrinsic(UseInst))
1597 return false;
1598
1599 // Monotonic or weaker atomic stores can be re-ordered and do not need to be
1600 // treated as read clobber.
1601 if (auto SI = dyn_cast<StoreInst>(UseInst))
1602 return isStrongerThan(SI->getOrdering(), AtomicOrdering::Monotonic);
1603
1604 if (!UseInst->mayReadFromMemory())
1605 return false;
1606
1607 if (auto *CB = dyn_cast<CallBase>(UseInst))
1608 if (CB->onlyAccessesInaccessibleMemory())
1609 return false;
1610
1611 return isRefSet(BatchAA.getModRefInfo(UseInst, DefLoc));
1612}
1613
1614bool DSEState::isGuaranteedLoopIndependent(const Instruction *Current,
1615 const Instruction *KillingDef,
1616 const MemoryLocation &CurrentLoc) {
1617 // If the dependency is within the same block or loop level (being careful
1618 // of irreducible loops), we know that AA will return a valid result for the
1619 // memory dependency. (Both at the function level, outside of any loop,
1620 // would also be valid but we currently disable that to limit compile time).
1621 if (Current->getParent() == KillingDef->getParent())
1622 return true;
1623 const Cycle *CurrentC = CI.getCycle(Current->getParent());
1624 if (CurrentC && CurrentC == CI.getCycle(KillingDef->getParent()))
1625 return true;
1626 // Otherwise check the memory location is invariant to any loops.
1627 return isGuaranteedLoopInvariant(CurrentLoc.Ptr);
1628}
1629
1630bool DSEState::isGuaranteedLoopInvariant(const Value *Ptr) {
1631 Ptr = Ptr->stripPointerCasts();
1632 if (auto *GEP = dyn_cast<GEPOperator>(Ptr))
1633 if (GEP->hasAllConstantIndices())
1634 Ptr = GEP->getPointerOperand()->stripPointerCasts();
1635
1636 if (auto *I = dyn_cast<Instruction>(Ptr)) {
1637 return I->getParent()->isEntryBlock() || !CI.getCycle(I->getParent());
1638 }
1639 return true;
1640}
1641
1642std::optional<MemoryAccess *> DSEState::getDomMemoryDef(
1643 MemoryDef *KillingDef, MemoryAccess *StartAccess,
1644 const MemoryLocation &KillingLoc, const Value *KillingUndObj,
1645 unsigned &ScanLimit, unsigned &WalkerStepLimit, bool IsMemTerm,
1646 unsigned &PartialLimit, bool IsInitializesAttrMemLoc) {
1647 if (ScanLimit == 0 || WalkerStepLimit == 0) {
1648 LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n");
1649 return std::nullopt;
1650 }
1651
1652 MemoryAccess *Current = StartAccess;
1653 Instruction *KillingI = KillingDef->getMemoryInst();
1654 LLVM_DEBUG(dbgs() << " trying to get dominating access\n");
1655
1656 // Only optimize defining access of KillingDef when directly starting at its
1657 // defining access. The defining access also must only access KillingLoc. At
1658 // the moment we only support instructions with a single write location, so
1659 // it should be sufficient to disable optimizations for instructions that
1660 // also read from memory.
1661 bool CanOptimize = OptimizeMemorySSA &&
1662 KillingDef->getDefiningAccess() == StartAccess &&
1663 !KillingI->mayReadFromMemory();
1664
1665 // Find the next clobbering Mod access for DefLoc, starting at StartAccess.
1666 std::optional<MemoryLocation> CurrentLoc;
1667 for (;; Current = cast<MemoryDef>(Current)->getDefiningAccess()) {
1668 LLVM_DEBUG({
1669 dbgs() << " visiting " << *Current;
1670 if (!MSSA.isLiveOnEntryDef(Current) && isa<MemoryUseOrDef>(Current))
1671 dbgs() << " (" << *cast<MemoryUseOrDef>(Current)->getMemoryInst()
1672 << ")";
1673 dbgs() << "\n";
1674 });
1675
1676 // Reached TOP.
1677 if (MSSA.isLiveOnEntryDef(Current)) {
1678 LLVM_DEBUG(dbgs() << " ... found LiveOnEntryDef\n");
1679 if (CanOptimize && Current != KillingDef->getDefiningAccess())
1680 // The first clobbering def is... none.
1681 KillingDef->setOptimized(Current);
1682 return std::nullopt;
1683 }
1684
1685 // Cost of a step. Accesses in the same block are more likely to be valid
1686 // candidates for elimination, hence consider them cheaper.
1687 unsigned StepCost = KillingDef->getBlock() == Current->getBlock()
1690 if (WalkerStepLimit <= StepCost) {
1691 LLVM_DEBUG(dbgs() << " ... hit walker step limit\n");
1692 return std::nullopt;
1693 }
1694 WalkerStepLimit -= StepCost;
1695
1696 // Return for MemoryPhis. They cannot be eliminated directly and the
1697 // caller is responsible for traversing them.
1698 if (isa<MemoryPhi>(Current)) {
1699 LLVM_DEBUG(dbgs() << " ... found MemoryPhi\n");
1700 return Current;
1701 }
1702
1703 // Below, check if CurrentDef is a valid candidate to be eliminated by
1704 // KillingDef. If it is not, check the next candidate.
1705 MemoryDef *CurrentDef = cast<MemoryDef>(Current);
1706 Instruction *CurrentI = CurrentDef->getMemoryInst();
1707
1708 if (canSkipDef(CurrentDef, !isInvisibleToCallerOnUnwind(KillingUndObj))) {
1709 CanOptimize = false;
1710 continue;
1711 }
1712
1713 // Before we try to remove anything, check for any extra throwing
1714 // instructions that block us from DSEing
1715 if (mayThrowBetween(KillingI, CurrentI, KillingUndObj)) {
1716 LLVM_DEBUG(dbgs() << " ... skip, may throw!\n");
1717 return std::nullopt;
1718 }
1719
1720 // Check for anything that looks like it will be a barrier to further
1721 // removal
1722 if (isDSEBarrier(KillingUndObj, CurrentI)) {
1723 LLVM_DEBUG(dbgs() << " ... skip, barrier\n");
1724 return std::nullopt;
1725 }
1726
1727 // If Current is known to be on path that reads DefLoc or is a read
1728 // clobber, bail out, as the path is not profitable. We skip this check
1729 // for intrinsic calls, because the code knows how to handle memcpy
1730 // intrinsics.
1731 if (!isa<IntrinsicInst>(CurrentI) && isReadClobber(KillingLoc, CurrentI))
1732 return std::nullopt;
1733
1734 // Quick check if there are direct uses that are read-clobbers.
1735 if (any_of(Current->uses(), [this, &KillingLoc, StartAccess](Use &U) {
1736 if (auto *UseOrDef = dyn_cast<MemoryUseOrDef>(U.getUser()))
1737 return !MSSA.dominates(StartAccess, UseOrDef) &&
1738 isReadClobber(KillingLoc, UseOrDef->getMemoryInst());
1739 return false;
1740 })) {
1741 LLVM_DEBUG(dbgs() << " ... found a read clobber\n");
1742 return std::nullopt;
1743 }
1744
1745 // If Current does not have an analyzable write location or is not
1746 // removable, skip it.
1747 CurrentLoc = getLocForWrite(CurrentI);
1748 if (!CurrentLoc || !isRemovable(CurrentI)) {
1749 CanOptimize = false;
1750 continue;
1751 }
1752
1753 // AliasAnalysis does not account for loops. Limit elimination to
1754 // candidates for which we can guarantee they always store to the same
1755 // memory location and not located in different loops.
1756 if (!isGuaranteedLoopIndependent(CurrentI, KillingI, *CurrentLoc)) {
1757 LLVM_DEBUG(dbgs() << " ... not guaranteed loop independent\n");
1758 CanOptimize = false;
1759 continue;
1760 }
1761
1762 if (IsMemTerm) {
1763 // If the killing def is a memory terminator (e.g. lifetime.end), check
1764 // the next candidate if the current Current does not write the same
1765 // underlying object as the terminator.
1766 if (!isMemTerminator(*CurrentLoc, CurrentI, KillingI)) {
1767 CanOptimize = false;
1768 continue;
1769 }
1770 } else {
1771 int64_t KillingOffset = 0;
1772 int64_t DeadOffset = 0;
1773 auto OR = isOverwrite(KillingI, CurrentI, KillingLoc, *CurrentLoc,
1774 KillingOffset, DeadOffset);
1775 if (CanOptimize) {
1776 // CurrentDef is the earliest write clobber of KillingDef. Use it as
1777 // optimized access. Do not optimize if CurrentDef is already the
1778 // defining access of KillingDef.
1779 if (CurrentDef != KillingDef->getDefiningAccess() &&
1780 (OR == OW_Complete || OR == OW_MaybePartial))
1781 KillingDef->setOptimized(CurrentDef);
1782
1783 // Once a may-aliasing def is encountered do not set an optimized
1784 // access.
1785 if (OR != OW_None)
1786 CanOptimize = false;
1787 }
1788
1789 // If Current does not write to the same object as KillingDef, check
1790 // the next candidate.
1791 if (OR == OW_Unknown || OR == OW_None)
1792 continue;
1793 else if (OR == OW_MaybePartial) {
1794 // If KillingDef only partially overwrites Current, check the next
1795 // candidate if the partial step limit is exceeded. This aggressively
1796 // limits the number of candidates for partial store elimination,
1797 // which are less likely to be removable in the end.
1798 if (PartialLimit <= 1) {
1799 WalkerStepLimit -= 1;
1800 LLVM_DEBUG(dbgs() << " ... reached partial limit ... continue with "
1801 "next access\n");
1802 continue;
1803 }
1804 PartialLimit -= 1;
1805 }
1806 }
1807 break;
1808 };
1809
1810 // Accesses to objects accessible after the function returns can only be
1811 // eliminated if the access is dead along all paths to the exit. Collect
1812 // the blocks with killing (=completely overwriting MemoryDefs) and check if
1813 // they cover all paths from MaybeDeadAccess to any function exit.
1814 SmallPtrSet<Instruction *, 16> KillingDefs;
1815 KillingDefs.insert(KillingDef->getMemoryInst());
1816 MemoryAccess *MaybeDeadAccess = Current;
1817 MemoryLocation MaybeDeadLoc = *CurrentLoc;
1818 Instruction *MaybeDeadI = cast<MemoryDef>(MaybeDeadAccess)->getMemoryInst();
1819 LLVM_DEBUG(dbgs() << " Checking for reads of " << *MaybeDeadAccess << " ("
1820 << *MaybeDeadI << ")\n");
1821
1823 SmallPtrSet<MemoryAccess *, 32> Visited;
1824 pushMemUses(MaybeDeadAccess, WorkList, Visited);
1825
1826 // Check if DeadDef may be read.
1827 for (unsigned I = 0; I < WorkList.size(); I++) {
1828 MemoryAccess *UseAccess = WorkList[I];
1829
1830 LLVM_DEBUG(dbgs() << " " << *UseAccess);
1831 // Bail out if the number of accesses to check exceeds the scan limit.
1832 if (ScanLimit < (WorkList.size() - I)) {
1833 LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n");
1834 return std::nullopt;
1835 }
1836 --ScanLimit;
1837 NumDomMemDefChecks++;
1838
1839 if (isa<MemoryPhi>(UseAccess)) {
1840 if (any_of(KillingDefs, [this, UseAccess](Instruction *KI) {
1841 return DT.properlyDominates(KI->getParent(), UseAccess->getBlock());
1842 })) {
1843 LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing block\n");
1844 continue;
1845 }
1846 LLVM_DEBUG(dbgs() << "\n ... adding PHI uses\n");
1847 pushMemUses(UseAccess, WorkList, Visited);
1848 continue;
1849 }
1850
1851 Instruction *UseInst = cast<MemoryUseOrDef>(UseAccess)->getMemoryInst();
1852 LLVM_DEBUG(dbgs() << " (" << *UseInst << ")\n");
1853
1854 if (any_of(KillingDefs, [this, UseInst](Instruction *KI) {
1855 return DT.dominates(KI, UseInst);
1856 })) {
1857 LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing def\n");
1858 continue;
1859 }
1860
1861 // A memory terminator kills all preceeding MemoryDefs and all succeeding
1862 // MemoryAccesses. We do not have to check it's users.
1863 if (isMemTerminator(MaybeDeadLoc, MaybeDeadI, UseInst)) {
1864 LLVM_DEBUG(
1865 dbgs()
1866 << " ... skipping, memterminator invalidates following accesses\n");
1867 continue;
1868 }
1869
1870 if (isNoopIntrinsic(cast<MemoryUseOrDef>(UseAccess)->getMemoryInst())) {
1871 LLVM_DEBUG(dbgs() << " ... adding uses of intrinsic\n");
1872 pushMemUses(UseAccess, WorkList, Visited);
1873 continue;
1874 }
1875
1876 if (UseInst->mayThrow() && !isInvisibleToCallerOnUnwind(KillingUndObj)) {
1877 LLVM_DEBUG(dbgs() << " ... found throwing instruction\n");
1878 return std::nullopt;
1879 }
1880
1881 // Uses which may read the original MemoryDef mean we cannot eliminate the
1882 // original MD. Stop walk.
1883 // If KillingDef is a CallInst with "initializes" attribute, the reads in
1884 // the callee would be dominated by initializations, so it should be safe.
1885 bool IsKillingDefFromInitAttr = false;
1886 if (IsInitializesAttrMemLoc) {
1887 if (KillingI == UseInst &&
1888 KillingUndObj == getUnderlyingObject(MaybeDeadLoc.Ptr))
1889 IsKillingDefFromInitAttr = true;
1890 }
1891
1892 if (isReadClobber(MaybeDeadLoc, UseInst) && !IsKillingDefFromInitAttr) {
1893 LLVM_DEBUG(dbgs() << " ... found read clobber\n");
1894 return std::nullopt;
1895 }
1896
1897 // If this worklist walks back to the original memory access (and the
1898 // pointer is not guarenteed loop invariant) then we cannot assume that a
1899 // store kills itself.
1900 if (MaybeDeadAccess == UseAccess &&
1901 !isGuaranteedLoopInvariant(MaybeDeadLoc.Ptr)) {
1902 LLVM_DEBUG(dbgs() << " ... found not loop invariant self access\n");
1903 return std::nullopt;
1904 }
1905 // Otherwise, for the KillingDef and MaybeDeadAccess we only have to check
1906 // if it reads the memory location.
1907 // TODO: It would probably be better to check for self-reads before
1908 // calling the function.
1909 if (KillingDef == UseAccess || MaybeDeadAccess == UseAccess) {
1910 LLVM_DEBUG(dbgs() << " ... skipping killing def/dom access\n");
1911 continue;
1912 }
1913
1914 // Check all uses for MemoryDefs, except for defs completely overwriting
1915 // the original location. Otherwise we have to check uses of *all*
1916 // MemoryDefs we discover, including non-aliasing ones. Otherwise we might
1917 // miss cases like the following
1918 // 1 = Def(LoE) ; <----- DeadDef stores [0,1]
1919 // 2 = Def(1) ; (2, 1) = NoAlias, stores [2,3]
1920 // Use(2) ; MayAlias 2 *and* 1, loads [0, 3].
1921 // (The Use points to the *first* Def it may alias)
1922 // 3 = Def(1) ; <---- Current (3, 2) = NoAlias, (3,1) = MayAlias,
1923 // stores [0,1]
1924 if (MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess)) {
1925 if (isCompleteOverwrite(MaybeDeadLoc, MaybeDeadI, UseInst)) {
1926 BasicBlock *MaybeKillingBlock = UseInst->getParent();
1927 if (PostOrderNumbers.find(MaybeKillingBlock)->second <
1928 PostOrderNumbers.find(MaybeDeadAccess->getBlock())->second) {
1929 if (!isInvisibleToCallerAfterRet(KillingUndObj, KillingLoc.Ptr,
1930 KillingLoc.Size)) {
1932 << " ... found killing def " << *UseInst << "\n");
1933 KillingDefs.insert(UseInst);
1934 }
1935 } else {
1937 << " ... found preceeding def " << *UseInst << "\n");
1938 return std::nullopt;
1939 }
1940 } else
1941 pushMemUses(UseDef, WorkList, Visited);
1942 }
1943 }
1944
1945 // For accesses to locations visible after the function returns, make sure
1946 // that the location is dead (=overwritten) along all paths from
1947 // MaybeDeadAccess to the exit.
1948 if (!isInvisibleToCallerAfterRet(KillingUndObj, KillingLoc.Ptr,
1949 KillingLoc.Size)) {
1950 SmallPtrSet<BasicBlock *, 16> KillingBlocks;
1951 for (Instruction *KD : KillingDefs)
1952 KillingBlocks.insert(KD->getParent());
1953 assert(!KillingBlocks.empty() &&
1954 "Expected at least a single killing block");
1955
1956 // Find the common post-dominator of all killing blocks.
1957 BasicBlock *CommonPred = *KillingBlocks.begin();
1958 for (BasicBlock *BB : llvm::drop_begin(KillingBlocks)) {
1959 if (!CommonPred)
1960 break;
1961 CommonPred = PDT.findNearestCommonDominator(CommonPred, BB);
1962 }
1963
1964 // If the common post-dominator does not post-dominate MaybeDeadAccess,
1965 // there is a path from MaybeDeadAccess to an exit not going through a
1966 // killing block.
1967 if (!PDT.dominates(CommonPred, MaybeDeadAccess->getBlock())) {
1968 if (!AnyUnreachableExit)
1969 return std::nullopt;
1970
1971 // Fall back to CFG scan starting at all non-unreachable roots if not
1972 // all paths to the exit go through CommonPred.
1973 CommonPred = nullptr;
1974 }
1975
1976 // If CommonPred itself is in the set of killing blocks, we're done.
1977 if (KillingBlocks.count(CommonPred))
1978 return {MaybeDeadAccess};
1979
1980 SetVector<BasicBlock *> WorkList;
1981 // If CommonPred is null, there are multiple exits from the function.
1982 // They all have to be added to the worklist.
1983 if (CommonPred)
1984 WorkList.insert(CommonPred);
1985 else
1986 for (BasicBlock *R : PDT.roots()) {
1987 if (!isa<UnreachableInst>(R->getTerminator()))
1988 WorkList.insert(R);
1989 }
1990
1991 NumCFGTries++;
1992 // Check if all paths starting from an exit node go through one of the
1993 // killing blocks before reaching MaybeDeadAccess.
1994 for (unsigned I = 0; I < WorkList.size(); I++) {
1995 NumCFGChecks++;
1996 BasicBlock *Current = WorkList[I];
1997 if (KillingBlocks.count(Current))
1998 continue;
1999 if (Current == MaybeDeadAccess->getBlock())
2000 return std::nullopt;
2001
2002 // MaybeDeadAccess is reachable from the entry, so we don't have to
2003 // explore unreachable blocks further.
2004 if (!DT.isReachableFromEntry(Current))
2005 continue;
2006
2007 WorkList.insert_range(predecessors(Current));
2008
2009 if (WorkList.size() >= MemorySSAPathCheckLimit)
2010 return std::nullopt;
2011 }
2012 NumCFGSuccess++;
2013 }
2014
2015 // No aliasing MemoryUses of MaybeDeadAccess found, MaybeDeadAccess is
2016 // potentially dead.
2017 return {MaybeDeadAccess};
2018}
2019
2020void DSEState::deleteDeadInstruction(Instruction *SI,
2021 SmallPtrSetImpl<MemoryAccess *> *Deleted) {
2022 MemorySSAUpdater Updater(&MSSA);
2023 SmallVector<Instruction *, 32> NowDeadInsts;
2024 NowDeadInsts.push_back(SI);
2025 --NumFastOther;
2026
2027 while (!NowDeadInsts.empty()) {
2028 Instruction *DeadInst = NowDeadInsts.pop_back_val();
2029 ++NumFastOther;
2030
2031 // Try to preserve debug information attached to the dead instruction.
2032 salvageDebugInfo(*DeadInst);
2033 salvageKnowledge(DeadInst);
2034
2035 // Remove the Instruction from MSSA.
2036 MemoryAccess *MA = MSSA.getMemoryAccess(DeadInst);
2037 bool IsMemDef = MA && isa<MemoryDef>(MA);
2038 if (MA) {
2039 if (IsMemDef) {
2040 auto *MD = cast<MemoryDef>(MA);
2041 SkipStores.insert(MD);
2042 if (Deleted)
2043 Deleted->insert(MD);
2044 if (auto *SI = dyn_cast<StoreInst>(MD->getMemoryInst())) {
2045 if (SI->getValueOperand()->getType()->isPointerTy()) {
2046 const Value *UO = getUnderlyingObject(SI->getValueOperand());
2047 if (CapturedBeforeReturn.erase(UO))
2048 ShouldIterateEndOfFunctionDSE = true;
2049 InvisibleToCallerAfterRet.erase(UO);
2050 InvisibleToCallerAfterRetBounded.erase(UO);
2051 }
2052 }
2053 }
2054
2055 Updater.removeMemoryAccess(MA);
2056 }
2057
2058 auto I = IOLs.find(DeadInst->getParent());
2059 if (I != IOLs.end())
2060 I->second.erase(DeadInst);
2061 // Remove its operands
2062 for (Use &O : DeadInst->operands())
2063 if (Instruction *OpI = dyn_cast<Instruction>(O)) {
2064 O.set(PoisonValue::get(O->getType()));
2065 if (isInstructionTriviallyDead(OpI, &TLI))
2066 NowDeadInsts.push_back(OpI);
2067 }
2068
2069 EA.removeInstruction(DeadInst);
2070 // Remove memory defs directly if they don't produce results, but only
2071 // queue other dead instructions for later removal. They may have been
2072 // used as memory locations that have been cached by BatchAA. Removing
2073 // them here may lead to newly created instructions to be allocated at the
2074 // same address, yielding stale cache entries.
2075 if (IsMemDef && DeadInst->getType()->isVoidTy())
2076 DeadInst->eraseFromParent();
2077 else
2078 ToRemove.push_back(DeadInst);
2079 }
2080}
2081
2082bool DSEState::mayThrowBetween(Instruction *KillingI, Instruction *DeadI,
2083 const Value *KillingUndObj) {
2084 // First see if we can ignore it by using the fact that KillingI is an
2085 // alloca/alloca like object that is not visible to the caller during
2086 // execution of the function.
2087 if (KillingUndObj && isInvisibleToCallerOnUnwind(KillingUndObj))
2088 return false;
2089
2090 if (KillingI->getParent() == DeadI->getParent())
2091 return ThrowingBlocks.count(KillingI->getParent());
2092 return !ThrowingBlocks.empty();
2093}
2094
2095bool DSEState::isDSEBarrier(const Value *KillingUndObj, Instruction *DeadI) {
2096 // If DeadI may throw it acts as a barrier, unless we are to an
2097 // alloca/alloca like object that does not escape.
2098 if (DeadI->mayThrow() && !isInvisibleToCallerOnUnwind(KillingUndObj))
2099 return true;
2100
2101 // If DeadI is an atomic load/store stronger than monotonic, do not try to
2102 // eliminate/reorder it.
2103 if (DeadI->isAtomic()) {
2104 if (auto *LI = dyn_cast<LoadInst>(DeadI))
2105 return isStrongerThanMonotonic(LI->getOrdering());
2106 if (auto *SI = dyn_cast<StoreInst>(DeadI))
2107 return isStrongerThanMonotonic(SI->getOrdering());
2108 if (auto *ARMW = dyn_cast<AtomicRMWInst>(DeadI))
2109 return isStrongerThanMonotonic(ARMW->getOrdering());
2110 if (auto *CmpXchg = dyn_cast<AtomicCmpXchgInst>(DeadI))
2111 return isStrongerThanMonotonic(CmpXchg->getSuccessOrdering()) ||
2112 isStrongerThanMonotonic(CmpXchg->getFailureOrdering());
2113 llvm_unreachable("other instructions should be skipped in MemorySSA");
2114 }
2115 return false;
2116}
2117
2118bool DSEState::eliminateDeadWritesAtEndOfFunction() {
2119 bool MadeChange = false;
2120 LLVM_DEBUG(
2121 dbgs() << "Trying to eliminate MemoryDefs at the end of the function\n");
2122 do {
2123 ShouldIterateEndOfFunctionDSE = false;
2124 for (MemoryDef *Def : llvm::reverse(MemDefs)) {
2125 if (SkipStores.contains(Def))
2126 continue;
2127
2128 Instruction *DefI = Def->getMemoryInst();
2129 auto DefLoc = getLocForWrite(DefI);
2130 if (!DefLoc || !isRemovable(DefI)) {
2131 LLVM_DEBUG(dbgs() << " ... could not get location for write or "
2132 "instruction not removable.\n");
2133 continue;
2134 }
2135
2136 // NOTE: Currently eliminating writes at the end of a function is
2137 // limited to MemoryDefs with a single underlying object, to save
2138 // compile-time. In practice it appears the case with multiple
2139 // underlying objects is very uncommon. If it turns out to be important,
2140 // we can use getUnderlyingObjects here instead.
2141 const Value *UO = getUnderlyingObject(DefLoc->Ptr);
2142 if (!isInvisibleToCallerAfterRet(UO, DefLoc->Ptr, DefLoc->Size))
2143 continue;
2144
2145 if (isWriteAtEndOfFunction(Def, *DefLoc)) {
2146 // See through pointer-to-pointer bitcasts
2147 LLVM_DEBUG(dbgs() << " ... MemoryDef is not accessed until the end "
2148 "of the function\n");
2150 ++NumFastStores;
2151 MadeChange = true;
2152 }
2153 }
2154 } while (ShouldIterateEndOfFunctionDSE);
2155 return MadeChange;
2156}
2157
2158bool DSEState::eliminateRedundantStoresViaDominatingConditions() {
2159 bool MadeChange = false;
2160 LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs whose value being "
2161 "written is implied by a dominating condition\n");
2162
2163 using ConditionInfo = std::pair<Value *, Value *>;
2164 using ScopedHTType = ScopedHashTable<ConditionInfo, Instruction *>;
2165
2166 // We maintain a scoped hash table of the active dominating conditions for a
2167 // given node.
2168 ScopedHTType ActiveConditions;
2169 auto GetDominatingCondition = [&](BasicBlock *BB)
2170 -> std::optional<std::tuple<ConditionInfo, Instruction *, BasicBlock *>> {
2171 auto *BI = dyn_cast<CondBrInst>(BB->getTerminator());
2172 if (!BI)
2173 return std::nullopt;
2174
2175 // In case both blocks are the same, it is not possible to determine
2176 // if optimization is possible. (We would not want to optimize a store
2177 // in the FalseBB if condition is true and vice versa.)
2178 if (BI->getSuccessor(0) == BI->getSuccessor(1))
2179 return std::nullopt;
2180
2181 Instruction *ICmpL;
2182 CmpPredicate Pred;
2183 Value *StorePtr, *StoreVal;
2184 if (!match(BI->getCondition(),
2185 m_c_ICmp(Pred, m_Instruction(ICmpL, m_Load(m_Value(StorePtr))),
2186 m_Value(StoreVal))) ||
2187 !ICmpInst::isEquality(Pred))
2188 return std::nullopt;
2189
2190 // Ensure the replacement is allowed when comparing pointers, as
2191 // the equality compares addresses only, not pointers' provenance.
2192 if (StoreVal->getType()->isPointerTy() &&
2193 !canReplacePointersIfEqual(StoreVal, ICmpL, DL))
2194 return std::nullopt;
2195
2196 unsigned ImpliedSuccIdx = Pred == ICmpInst::ICMP_EQ ? 0 : 1;
2197 BasicBlock *ImpliedSucc = BI->getSuccessor(ImpliedSuccIdx);
2198 return {{ConditionInfo(StorePtr, StoreVal), ICmpL, ImpliedSucc}};
2199 };
2200
2201 auto VisitNode = [&](DomTreeNode *Node, unsigned Depth, auto &Self) -> void {
2203 return;
2204
2205 BasicBlock *BB = Node->getBlock();
2206 // Check for redundant stores against active known conditions.
2207 if (auto *Accesses = MSSA.getBlockDefs(BB)) {
2208 for (auto &Access : make_early_inc_range(*Accesses)) {
2209 auto *Def = dyn_cast<MemoryDef>(&Access);
2210 if (!Def)
2211 continue;
2212
2213 auto *SI = dyn_cast<StoreInst>(Def->getMemoryInst());
2214 if (!SI || !SI->isUnordered())
2215 continue;
2216
2217 Instruction *LI = ActiveConditions.lookup(
2218 {SI->getPointerOperand(), SI->getValueOperand()});
2219 if (!LI)
2220 continue;
2221
2222 // Found a dominating condition that may imply the value being stored.
2223 // Make sure there does not exist any clobbering access between the
2224 // load and the potential redundant store.
2225 MemoryAccess *LoadAccess = MSSA.getMemoryAccess(LI);
2226 MemoryAccess *ClobberingAccess =
2227 MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def, BatchAA);
2228 if (MSSA.dominates(ClobberingAccess, LoadAccess)) {
2230 << "Removing No-Op Store:\n DEAD: " << *SI << '\n');
2232 NumRedundantStores++;
2233 MadeChange = true;
2234 }
2235 }
2236 }
2237
2238 // See whether this basic block establishes a dominating condition.
2239 auto MaybeCondition = GetDominatingCondition(BB);
2240
2241 for (DomTreeNode *Child : Node->children()) {
2242 // RAII scope for the active conditions.
2243 ScopedHTType::ScopeTy Scope(ActiveConditions);
2244 if (MaybeCondition) {
2245 const auto &[Cond, LI, ImpliedSucc] = *MaybeCondition;
2246 if (DT.dominates(BasicBlockEdge(BB, ImpliedSucc), Child->getBlock())) {
2247 // Found a condition that holds for this child, dominated by the
2248 // current node via the equality edge. Propagate the condition to
2249 // the children by pushing it onto the table.
2250 ActiveConditions.insert(Cond, LI);
2251 }
2252 }
2253
2254 // Recursively visit the children of this node. Upon destruction, the no
2255 // longer active condition before visiting any sibling nodes is popped
2256 // from the active scope.
2257 Self(Child, Depth + 1, Self);
2258 }
2259 };
2260
2261 // Do a DFS walk of the dom-tree.
2263
2264 return MadeChange;
2265}
2266
2267bool DSEState::tryFoldIntoCalloc(MemoryDef *Def, const Value *DefUO) {
2268 Instruction *DefI = Def->getMemoryInst();
2269 MemSetInst *MemSet = dyn_cast<MemSetInst>(DefI);
2270 if (!MemSet)
2271 // TODO: Could handle zero store to small allocation as well.
2272 return false;
2273 Constant *StoredConstant = dyn_cast<Constant>(MemSet->getValue());
2274 if (!StoredConstant || !StoredConstant->isNullValue())
2275 return false;
2276
2277 if (!isRemovable(DefI))
2278 // The memset might be volatile..
2279 return false;
2280
2281 if (F.hasFnAttribute(Attribute::SanitizeMemory) ||
2282 F.hasFnAttribute(Attribute::SanitizeAddress) ||
2283 F.hasFnAttribute(Attribute::SanitizeHWAddress) || F.getName() == "calloc")
2284 return false;
2285 auto *Malloc = const_cast<CallInst *>(dyn_cast<CallInst>(DefUO));
2286 if (!Malloc)
2287 return false;
2288 auto *InnerCallee = Malloc->getCalledFunction();
2289 if (!InnerCallee)
2290 return false;
2291 LibFunc Func = NotLibFunc;
2292 StringRef ZeroedVariantName;
2293 if (!TLI.getLibFunc(*InnerCallee, Func) || !TLI.has(Func) ||
2294 Func != LibFunc_malloc) {
2295 Attribute Attr = Malloc->getFnAttr("alloc-variant-zeroed");
2296 if (!Attr.isValid())
2297 return false;
2298 ZeroedVariantName = Attr.getValueAsString();
2299 if (ZeroedVariantName.empty())
2300 return false;
2301 }
2302
2303 // Gracefully handle malloc with unexpected memory attributes.
2304 auto *MallocDef = dyn_cast_or_null<MemoryDef>(MSSA.getMemoryAccess(Malloc));
2305 if (!MallocDef)
2306 return false;
2307
2308 auto shouldCreateCalloc = [](CallInst *Malloc, CallInst *Memset) {
2309 // Check for br(icmp ptr, null), truebb, falsebb) pattern at the end
2310 // of malloc block
2311 auto *MallocBB = Malloc->getParent(), *MemsetBB = Memset->getParent();
2312 if (MallocBB == MemsetBB)
2313 return true;
2314 auto *Ptr = Memset->getArgOperand(0);
2315 auto *TI = MallocBB->getTerminator();
2316 BasicBlock *TrueBB, *FalseBB;
2317 if (!match(TI, m_Br(m_SpecificICmp(ICmpInst::ICMP_EQ, m_Specific(Ptr),
2318 m_Zero()),
2319 TrueBB, FalseBB)))
2320 return false;
2321 if (MemsetBB != FalseBB)
2322 return false;
2323 return true;
2324 };
2325
2326 if (Malloc->getOperand(0) != MemSet->getLength())
2327 return false;
2328 if (!shouldCreateCalloc(Malloc, MemSet) || !DT.dominates(Malloc, MemSet) ||
2329 !memoryIsNotModifiedBetween(Malloc, MemSet, BatchAA, DL, &DT))
2330 return false;
2331 IRBuilder<> IRB(Malloc);
2332 assert(Func == LibFunc_malloc || !ZeroedVariantName.empty());
2333 Value *Calloc = nullptr;
2334 if (!ZeroedVariantName.empty()) {
2335 LLVMContext &Ctx = Malloc->getContext();
2336 AttributeList Attrs = InnerCallee->getAttributes();
2337 AllocFnKind AllocKind =
2338 Attrs.getFnAttr(Attribute::AllocKind).getAllocKind() |
2339 AllocFnKind::Zeroed;
2340 AllocKind &= ~AllocFnKind::Uninitialized;
2341 Attrs =
2342 Attrs.addFnAttribute(Ctx, Attribute::getWithAllocKind(Ctx, AllocKind))
2343 .removeFnAttribute(Ctx, "alloc-variant-zeroed");
2344 FunctionCallee ZeroedVariant = Malloc->getModule()->getOrInsertFunction(
2345 ZeroedVariantName, InnerCallee->getFunctionType(), Attrs);
2346 cast<Function>(ZeroedVariant.getCallee())
2347 ->setCallingConv(Malloc->getCallingConv());
2349 Args.append(Malloc->arg_begin(), Malloc->arg_end());
2350 CallInst *CI = IRB.CreateCall(ZeroedVariant, Args, ZeroedVariantName);
2351 CI->setCallingConv(Malloc->getCallingConv());
2352 Calloc = CI;
2353 } else {
2354 Type *SizeTTy = Malloc->getArgOperand(0)->getType();
2355 Calloc = emitCalloc(ConstantInt::get(SizeTTy, 1), Malloc->getArgOperand(0),
2356 IRB, TLI, Malloc->getType()->getPointerAddressSpace());
2357 }
2358 if (!Calloc)
2359 return false;
2360
2361 MemorySSAUpdater Updater(&MSSA);
2362 auto *NewAccess = Updater.createMemoryAccessAfter(cast<Instruction>(Calloc),
2363 nullptr, MallocDef);
2364 auto *NewAccessMD = cast<MemoryDef>(NewAccess);
2365 Updater.insertDef(NewAccessMD, /*RenameUses=*/true);
2366 Malloc->replaceAllUsesWith(Calloc);
2368 return true;
2369}
2370
2371bool DSEState::storeIsNoop(MemoryDef *Def, const Value *DefUO) {
2372 Instruction *DefI = Def->getMemoryInst();
2373 StoreInst *Store = dyn_cast<StoreInst>(DefI);
2374 MemSetInst *MemSet = dyn_cast<MemSetInst>(DefI);
2375 Constant *StoredConstant = nullptr;
2376 if (Store)
2377 StoredConstant = dyn_cast<Constant>(Store->getOperand(0));
2378 else if (MemSet)
2379 StoredConstant = dyn_cast<Constant>(MemSet->getValue());
2380 else
2381 return false;
2382
2383 if (!isRemovable(DefI))
2384 return false;
2385
2386 if (StoredConstant) {
2387 Constant *InitC =
2388 getInitialValueOfAllocation(DefUO, &TLI, StoredConstant->getType());
2389 // If the clobbering access is LiveOnEntry, no instructions between them
2390 // can modify the memory location.
2391 if (InitC && InitC == StoredConstant)
2392 return MSSA.isLiveOnEntryDef(
2393 MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def, BatchAA));
2394 }
2395
2396 if (!Store)
2397 return false;
2398
2399 if (auto *LoadI = dyn_cast<LoadInst>(Store->getOperand(0))) {
2400 if (LoadI->getPointerOperand() == Store->getOperand(1)) {
2401 // Get the defining access for the load.
2402 auto *LoadAccess = MSSA.getMemoryAccess(LoadI)->getDefiningAccess();
2403 // Fast path: the defining accesses are the same.
2404 if (LoadAccess == Def->getDefiningAccess())
2405 return true;
2406
2407 // Look through phi accesses. Recursively scan all phi accesses by
2408 // adding them to a worklist. Bail when we run into a memory def that
2409 // does not match LoadAccess.
2410 SetVector<MemoryAccess *> ToCheck;
2411 MemoryAccess *Current =
2412 MSSA.getWalker()->getClobberingMemoryAccess(Def, BatchAA);
2413 // We don't want to bail when we run into the store memory def. But,
2414 // the phi access may point to it. So, pretend like we've already
2415 // checked it.
2416 ToCheck.insert(Def);
2417 ToCheck.insert(Current);
2418 // Start at current (1) to simulate already having checked Def.
2419 for (unsigned I = 1; I < ToCheck.size(); ++I) {
2420 Current = ToCheck[I];
2421 if (auto PhiAccess = dyn_cast<MemoryPhi>(Current)) {
2422 // Check all the operands.
2423 for (auto &Use : PhiAccess->incoming_values())
2424 ToCheck.insert(cast<MemoryAccess>(&Use));
2425 continue;
2426 }
2427
2428 // If we found a memory def, bail. This happens when we have an
2429 // unrelated write in between an otherwise noop store.
2430 assert(isa<MemoryDef>(Current) && "Only MemoryDefs should reach here.");
2431 // TODO: Skip no alias MemoryDefs that have no aliasing reads.
2432 // We are searching for the definition of the store's destination.
2433 // So, if that is the same definition as the load, then this is a
2434 // noop. Otherwise, fail.
2435 if (LoadAccess != Current)
2436 return false;
2437 }
2438 return true;
2439 }
2440 }
2441
2442 return false;
2443}
2444
2445bool DSEState::removePartiallyOverlappedStores(InstOverlapIntervalsTy &IOL) {
2446 bool Changed = false;
2447 for (auto OI : IOL) {
2448 Instruction *DeadI = OI.first;
2449 MemoryLocation Loc = *getLocForWrite(DeadI);
2450 assert(isRemovable(DeadI) && "Expect only removable instruction");
2451
2452 const Value *Ptr = Loc.Ptr->stripPointerCasts();
2453 int64_t DeadStart = 0;
2454 uint64_t DeadSize = Loc.Size.getValue();
2455 GetPointerBaseWithConstantOffset(Ptr, DeadStart, DL);
2456 OverlapIntervalsTy &IntervalMap = OI.second;
2457 Changed |= tryToShortenEnd(DeadI, IntervalMap, DeadStart, DeadSize);
2458 if (IntervalMap.empty())
2459 continue;
2460 Changed |= tryToShortenBegin(DeadI, IntervalMap, DeadStart, DeadSize);
2461 }
2462 return Changed;
2463}
2464
2465bool DSEState::eliminateRedundantStoresOfExistingValues() {
2466 bool MadeChange = false;
2467 LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs that write the "
2468 "already existing value\n");
2469 for (auto *Def : MemDefs) {
2470 if (SkipStores.contains(Def) || MSSA.isLiveOnEntryDef(Def))
2471 continue;
2472
2473 Instruction *DefInst = Def->getMemoryInst();
2474 auto MaybeDefLoc = getLocForWrite(DefInst);
2475 if (!MaybeDefLoc || !isRemovable(DefInst))
2476 continue;
2477
2478 MemoryDef *UpperDef;
2479 // To conserve compile-time, we avoid walking to the next clobbering def.
2480 // Instead, we just try to get the optimized access, if it exists. DSE
2481 // will try to optimize defs during the earlier traversal.
2482 if (Def->isOptimized())
2483 UpperDef = dyn_cast<MemoryDef>(Def->getOptimized());
2484 else
2485 UpperDef = dyn_cast<MemoryDef>(Def->getDefiningAccess());
2486 if (!UpperDef || MSSA.isLiveOnEntryDef(UpperDef))
2487 continue;
2488
2489 Instruction *UpperInst = UpperDef->getMemoryInst();
2490 auto IsRedundantStore = [&]() {
2491 // We don't care about differences in call attributes here.
2492 if (DefInst->isIdenticalToWhenDefined(UpperInst,
2493 /*IntersectAttrs=*/true))
2494 return true;
2495 if (auto *MemSetI = dyn_cast<MemSetInst>(UpperInst)) {
2496 if (auto *SI = dyn_cast<StoreInst>(DefInst)) {
2497 // MemSetInst must have a write location.
2498 auto UpperLoc = getLocForWrite(UpperInst);
2499 if (!UpperLoc)
2500 return false;
2501 int64_t InstWriteOffset = 0;
2502 int64_t DepWriteOffset = 0;
2503 auto OR = isOverwrite(UpperInst, DefInst, *UpperLoc, *MaybeDefLoc,
2504 InstWriteOffset, DepWriteOffset);
2505 Value *StoredByte = isBytewiseValue(SI->getValueOperand(), DL);
2506 return StoredByte && StoredByte == MemSetI->getOperand(1) &&
2507 OR == OW_Complete;
2508 }
2509 }
2510 return false;
2511 };
2512
2513 if (!IsRedundantStore() || isReadClobber(*MaybeDefLoc, DefInst))
2514 continue;
2515 LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " << *DefInst
2516 << '\n');
2517 deleteDeadInstruction(DefInst);
2518 NumRedundantStores++;
2519 MadeChange = true;
2520 }
2521 return MadeChange;
2522}
2523
2525DSEState::getInitializesArgMemLoc(const Instruction *I) {
2526 const CallBase *CB = dyn_cast<CallBase>(I);
2527 if (!CB)
2528 return {};
2529
2530 // Collect aliasing arguments and their initializes ranges.
2531 SmallMapVector<Value *, SmallVector<ArgumentInitInfo, 2>, 2> Arguments;
2532 for (unsigned Idx = 0, Count = CB->arg_size(); Idx < Count; ++Idx) {
2533 Value *CurArg = CB->getArgOperand(Idx);
2534 if (!CurArg->getType()->isPointerTy())
2535 continue;
2536
2537 ConstantRangeList Inits;
2538 Attribute InitializesAttr = CB->getParamAttr(Idx, Attribute::Initializes);
2539 // initializes on byval arguments refers to the callee copy, not the
2540 // original memory the caller passed in.
2541 if (InitializesAttr.isValid() && !CB->isByValArgument(Idx))
2542 Inits = InitializesAttr.getValueAsConstantRangeList();
2543
2544 // Check whether "CurArg" could alias with global variables. We require
2545 // either it's function local and isn't captured before or the "CB" only
2546 // accesses arg or inaccessible mem.
2547 if (!Inits.empty() && !CB->onlyAccessesInaccessibleMemOrArgMem() &&
2548 !isFuncLocalAndNotCaptured(CurArg, CB, EA))
2549 Inits = ConstantRangeList();
2550
2551 // We don't perform incorrect DSE on unwind edges in the current function,
2552 // and use the "initializes" attribute to kill dead stores if:
2553 // - The call does not throw exceptions, "CB->doesNotThrow()".
2554 // - Or the callee parameter has "dead_on_unwind" attribute.
2555 // - Or the argument is invisible to caller on unwind, and there are no
2556 // unwind edges from this call in the current function (e.g. `CallInst`).
2557 bool IsDeadOrInvisibleOnUnwind =
2558 CB->paramHasAttr(Idx, Attribute::DeadOnUnwind) ||
2559 (isa<CallInst>(CB) && isInvisibleToCallerOnUnwind(CurArg));
2560 ArgumentInitInfo InitInfo{Idx, IsDeadOrInvisibleOnUnwind, Inits};
2561 bool FoundAliasing = false;
2562 for (auto &[Arg, AliasList] : Arguments) {
2563 auto AAR = BatchAA.alias(MemoryLocation::getBeforeOrAfter(Arg),
2565 if (AAR == AliasResult::NoAlias) {
2566 continue;
2567 } else if (AAR == AliasResult::MustAlias) {
2568 FoundAliasing = true;
2569 AliasList.push_back(InitInfo);
2570 } else {
2571 // For PartialAlias and MayAlias, there is an offset or may be an
2572 // unknown offset between the arguments and we insert an empty init
2573 // range to discard the entire initializes info while intersecting.
2574 FoundAliasing = true;
2575 AliasList.push_back(ArgumentInitInfo{Idx, IsDeadOrInvisibleOnUnwind,
2576 ConstantRangeList()});
2577 }
2578 }
2579 if (!FoundAliasing)
2580 Arguments[CurArg] = {InitInfo};
2581 }
2582
2584 for (const auto &[_, Args] : Arguments) {
2585 auto IntersectedRanges =
2587 if (IntersectedRanges.empty())
2588 continue;
2589
2590 for (const auto &Arg : Args) {
2591 for (const auto &Range : IntersectedRanges) {
2592 int64_t Start = Range.getLower().getSExtValue();
2593 int64_t End = Range.getUpper().getSExtValue();
2594 // For now, we only handle locations starting at offset 0.
2595 if (Start == 0)
2596 Locations.push_back(MemoryLocation(CB->getArgOperand(Arg.Idx),
2597 LocationSize::precise(End - Start),
2598 CB->getAAMetadata()));
2599 }
2600 }
2601 }
2602 return Locations;
2603}
2604
2605std::pair<bool, bool>
2606DSEState::eliminateDeadDefs(const MemoryLocationWrapper &KillingLocWrapper) {
2607 bool Changed = false;
2608 bool DeletedKillingLoc = false;
2609 unsigned ScanLimit = MemorySSAScanLimit;
2610 unsigned WalkerStepLimit = MemorySSAUpwardsStepLimit;
2611 unsigned PartialLimit = MemorySSAPartialStoreLimit;
2612 // Worklist of MemoryAccesses that may be killed by
2613 // "KillingLocWrapper.MemDef".
2614 SmallSetVector<MemoryAccess *, 8> ToCheck;
2615 // Track MemoryAccesses that have been deleted in the loop below, so we can
2616 // skip them. Don't use SkipStores for this, which may contain reused
2617 // MemoryAccess addresses.
2618 SmallPtrSet<MemoryAccess *, 8> Deleted;
2619 [[maybe_unused]] unsigned OrigNumSkipStores = SkipStores.size();
2620 ToCheck.insert(KillingLocWrapper.MemDef->getDefiningAccess());
2621
2622 // Check if MemoryAccesses in the worklist are killed by
2623 // "KillingLocWrapper.MemDef".
2624 for (unsigned I = 0; I < ToCheck.size(); I++) {
2625 MemoryAccess *Current = ToCheck[I];
2626 if (Deleted.contains(Current))
2627 continue;
2628 std::optional<MemoryAccess *> MaybeDeadAccess = getDomMemoryDef(
2629 KillingLocWrapper.MemDef, Current, KillingLocWrapper.MemLoc,
2630 KillingLocWrapper.UnderlyingObject, ScanLimit, WalkerStepLimit,
2631 isMemTerminatorInst(KillingLocWrapper.DefInst), PartialLimit,
2632 KillingLocWrapper.DefByInitializesAttr);
2633
2634 if (!MaybeDeadAccess) {
2635 LLVM_DEBUG(dbgs() << " finished walk\n");
2636 continue;
2637 }
2638 MemoryAccess *DeadAccess = *MaybeDeadAccess;
2639 LLVM_DEBUG(dbgs() << " Checking if we can kill " << *DeadAccess);
2640 if (isa<MemoryPhi>(DeadAccess)) {
2641 LLVM_DEBUG(dbgs() << "\n ... adding incoming values to worklist\n");
2642 for (Value *V : cast<MemoryPhi>(DeadAccess)->incoming_values()) {
2643 MemoryAccess *IncomingAccess = cast<MemoryAccess>(V);
2644 BasicBlock *IncomingBlock = IncomingAccess->getBlock();
2645 BasicBlock *PhiBlock = DeadAccess->getBlock();
2646
2647 // We only consider incoming MemoryAccesses that come before the
2648 // MemoryPhi. Otherwise we could discover candidates that do not
2649 // strictly dominate our starting def.
2650 if (PostOrderNumbers[IncomingBlock] > PostOrderNumbers[PhiBlock])
2651 ToCheck.insert(IncomingAccess);
2652 }
2653 continue;
2654 }
2655 // We cannot apply the initializes attribute to DeadAccess/DeadDef.
2656 // It would incorrectly consider a call instruction as redundant store
2657 // and remove this call instruction.
2658 // TODO: this conflates the existence of a MemoryLocation with being able
2659 // to delete the instruction. Fix isRemovable() to consider calls with
2660 // side effects that cannot be removed, e.g. calls with the initializes
2661 // attribute, and remove getLocForInst(ConsiderInitializesAttr = false).
2662 MemoryDefWrapper DeadDefWrapper(
2663 cast<MemoryDef>(DeadAccess),
2664 getLocForInst(cast<MemoryDef>(DeadAccess)->getMemoryInst(),
2665 /*ConsiderInitializesAttr=*/false));
2666 assert(DeadDefWrapper.DefinedLocations.size() == 1);
2667 MemoryLocationWrapper &DeadLocWrapper =
2668 DeadDefWrapper.DefinedLocations.front();
2669 LLVM_DEBUG(dbgs() << " (" << *DeadLocWrapper.DefInst << ")\n");
2670 ToCheck.insert(DeadLocWrapper.MemDef->getDefiningAccess());
2671 NumGetDomMemoryDefPassed++;
2672
2673 if (!DebugCounter::shouldExecute(MemorySSACounter))
2674 continue;
2675 if (isMemTerminatorInst(KillingLocWrapper.DefInst)) {
2676 if (KillingLocWrapper.UnderlyingObject != DeadLocWrapper.UnderlyingObject)
2677 continue;
2678 LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: "
2679 << *DeadLocWrapper.DefInst << "\n KILLER: "
2680 << *KillingLocWrapper.DefInst << '\n');
2681 deleteDeadInstruction(DeadLocWrapper.DefInst, &Deleted);
2682 ++NumFastStores;
2683 Changed = true;
2684 } else {
2685 // Check if DeadI overwrites KillingI.
2686 int64_t KillingOffset = 0;
2687 int64_t DeadOffset = 0;
2688 OverwriteResult OR =
2689 isOverwrite(KillingLocWrapper.DefInst, DeadLocWrapper.DefInst,
2690 KillingLocWrapper.MemLoc, DeadLocWrapper.MemLoc,
2691 KillingOffset, DeadOffset);
2692 if (OR == OW_MaybePartial) {
2693 auto &IOL = IOLs[DeadLocWrapper.DefInst->getParent()];
2694 OR = isPartialOverwrite(KillingLocWrapper.MemLoc, DeadLocWrapper.MemLoc,
2695 KillingOffset, DeadOffset,
2696 DeadLocWrapper.DefInst, IOL);
2697 }
2698 if (EnablePartialStoreMerging && OR == OW_PartialEarlierWithFullLater) {
2699 auto *DeadSI = dyn_cast<StoreInst>(DeadLocWrapper.DefInst);
2700 auto *KillingSI = dyn_cast<StoreInst>(KillingLocWrapper.DefInst);
2701 // We are re-using tryToMergePartialOverlappingStores, which requires
2702 // DeadSI to dominate KillingSI.
2703 // TODO: implement tryToMergeParialOverlappingStores using MemorySSA.
2704 if (DeadSI && KillingSI && DT.dominates(DeadSI, KillingSI)) {
2705 if (Constant *Merged = tryToMergePartialOverlappingStores(
2706 KillingSI, DeadSI, KillingOffset, DeadOffset, DL, BatchAA,
2707 &DT)) {
2708
2709 // Update stored value of earlier store to merged constant.
2710 DeadSI->setOperand(0, Merged);
2711 ++NumModifiedStores;
2712 Changed = true;
2713 DeletedKillingLoc = true;
2714
2715 // Remove killing store and remove any outstanding overlap
2716 // intervals for the updated store.
2717 deleteDeadInstruction(KillingSI, &Deleted);
2718 auto I = IOLs.find(DeadSI->getParent());
2719 if (I != IOLs.end())
2720 I->second.erase(DeadSI);
2721 break;
2722 }
2723 }
2724 }
2725 if (OR == OW_Complete) {
2726 LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: "
2727 << *DeadLocWrapper.DefInst << "\n KILLER: "
2728 << *KillingLocWrapper.DefInst << '\n');
2729 deleteDeadInstruction(DeadLocWrapper.DefInst, &Deleted);
2730 ++NumFastStores;
2731 Changed = true;
2732 }
2733 }
2734 }
2735
2736 assert(SkipStores.size() - OrigNumSkipStores == Deleted.size() &&
2737 "SkipStores and Deleted out of sync?");
2738
2739 return {Changed, DeletedKillingLoc};
2740}
2741
2742bool DSEState::eliminateDeadDefs(const MemoryDefWrapper &KillingDefWrapper) {
2743 if (KillingDefWrapper.DefinedLocations.empty()) {
2744 LLVM_DEBUG(dbgs() << "Failed to find analyzable write location for "
2745 << *KillingDefWrapper.DefInst << "\n");
2746 return false;
2747 }
2748
2749 bool MadeChange = false;
2750 for (auto &KillingLocWrapper : KillingDefWrapper.DefinedLocations) {
2751 LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs killed by "
2752 << *KillingLocWrapper.MemDef << " ("
2753 << *KillingLocWrapper.DefInst << ")\n");
2754 auto [Changed, DeletedKillingLoc] = eliminateDeadDefs(KillingLocWrapper);
2755 MadeChange |= Changed;
2756
2757 // Check if the store is a no-op.
2758 if (!DeletedKillingLoc && storeIsNoop(KillingLocWrapper.MemDef,
2759 KillingLocWrapper.UnderlyingObject)) {
2760 LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: "
2761 << *KillingLocWrapper.DefInst << '\n');
2762 deleteDeadInstruction(KillingLocWrapper.DefInst);
2763 NumRedundantStores++;
2764 MadeChange = true;
2765 continue;
2766 }
2767 // Can we form a calloc from a memset/malloc pair?
2768 if (!DeletedKillingLoc &&
2769 tryFoldIntoCalloc(KillingLocWrapper.MemDef,
2770 KillingLocWrapper.UnderlyingObject)) {
2771 LLVM_DEBUG(dbgs() << "DSE: Remove memset after forming calloc:\n"
2772 << " DEAD: " << *KillingLocWrapper.DefInst << '\n');
2773 deleteDeadInstruction(KillingLocWrapper.DefInst);
2774 MadeChange = true;
2775 continue;
2776 }
2777 }
2778 return MadeChange;
2779}
2780
2783 const TargetLibraryInfo &TLI,
2784 const CycleInfo &CI) {
2785 bool MadeChange = false;
2786 DSEState State(F, AA, MSSA, DT, PDT, TLI, CI);
2787 // For each store:
2788 for (unsigned I = 0; I < State.MemDefs.size(); I++) {
2789 MemoryDef *KillingDef = State.MemDefs[I];
2790 if (State.SkipStores.count(KillingDef))
2791 continue;
2792
2793 MemoryDefWrapper KillingDefWrapper(
2794 KillingDef, State.getLocForInst(KillingDef->getMemoryInst(),
2796 MadeChange |= State.eliminateDeadDefs(KillingDefWrapper);
2797 }
2798
2800 for (auto &KV : State.IOLs)
2801 MadeChange |= State.removePartiallyOverlappedStores(KV.second);
2802
2803 MadeChange |= State.eliminateRedundantStoresOfExistingValues();
2804 MadeChange |= State.eliminateDeadWritesAtEndOfFunction();
2805 MadeChange |= State.eliminateRedundantStoresViaDominatingConditions();
2806
2807 while (!State.ToRemove.empty()) {
2808 Instruction *DeadInst = State.ToRemove.pop_back_val();
2809 DeadInst->eraseFromParent();
2810 }
2811
2812 return MadeChange;
2813}
2814
2815//===----------------------------------------------------------------------===//
2816// DSE Pass
2817//===----------------------------------------------------------------------===//
2822 MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
2825
2826 bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, CI);
2827
2828#ifdef LLVM_ENABLE_STATS
2830 for (auto &I : instructions(F))
2831 NumRemainingStores += isa<StoreInst>(&I);
2832#endif
2833
2834 if (!Changed)
2835 return PreservedAnalyses::all();
2836
2840 return PA;
2841}
2842
2843namespace {
2844
2845/// A legacy pass for the legacy pass manager that wraps \c DSEPass.
2846class DSELegacyPass : public FunctionPass {
2847public:
2848 static char ID; // Pass identification, replacement for typeid
2849
2850 DSELegacyPass() : FunctionPass(ID) {
2852 }
2853
2854 bool runOnFunction(Function &F) override {
2855 if (skipFunction(F))
2856 return false;
2857
2858 AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
2859 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2860 const TargetLibraryInfo &TLI =
2861 getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
2862 MemorySSA &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
2863 PostDominatorTree &PDT =
2864 getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
2865 CycleInfo &CI = getAnalysis<CycleInfoWrapperPass>().getResult();
2866
2867 bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, CI);
2868
2869#ifdef LLVM_ENABLE_STATS
2871 for (auto &I : instructions(F))
2872 NumRemainingStores += isa<StoreInst>(&I);
2873#endif
2874
2875 return Changed;
2876 }
2877
2878 void getAnalysisUsage(AnalysisUsage &AU) const override {
2879 AU.setPreservesCFG();
2880 AU.addRequired<AAResultsWrapperPass>();
2881 AU.addRequired<TargetLibraryInfoWrapperPass>();
2882 AU.addPreserved<GlobalsAAWrapperPass>();
2883 AU.addRequired<DominatorTreeWrapperPass>();
2884 AU.addPreserved<DominatorTreeWrapperPass>();
2885 AU.addRequired<PostDominatorTreeWrapperPass>();
2886 AU.addRequired<MemorySSAWrapperPass>();
2887 AU.addPreserved<PostDominatorTreeWrapperPass>();
2888 AU.addPreserved<MemorySSAWrapperPass>();
2889 AU.addRequired<CycleInfoWrapperPass>();
2890 AU.addPreserved<CycleInfoWrapperPass>();
2891 AU.addRequired<AssumptionCacheTracker>();
2892 }
2893};
2894
2895} // end anonymous namespace
2896
2897char DSELegacyPass::ID = 0;
2898
2899INITIALIZE_PASS_BEGIN(DSELegacyPass, "dse", "Dead Store Elimination", false,
2900 false)
2910INITIALIZE_PASS_END(DSELegacyPass, "dse", "Dead Store Elimination", false,
2911 false)
2912
2914 return new DSELegacyPass();
2915}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Lower Kernel Arguments
This file implements a class to represent arbitrary precision integral constant values and operations...
ReachingDefInfo InstSet & ToRemove
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Expand Atomic instructions
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_ABI
Definition Compiler.h:213
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file declares an analysis pass that computes CycleInfo for LLVM IR, specialized from GenericCycl...
DXIL Forward Handle Accesses
DXIL Resource Access
static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT, PostDominatorTree &PDT, const TargetLibraryInfo &TLI, const CycleInfo &CI)
MapVector< Instruction *, OverlapIntervalsTy > InstOverlapIntervalsTy
static bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller)
static cl::opt< bool > EnableInitializesImprovement("enable-dse-initializes-attr-improvement", cl::init(true), cl::Hidden, cl::desc("Enable the initializes attr improvement in DSE"))
static void shortenAssignment(Instruction *Inst, Value *OriginalDest, uint64_t OldOffsetInBits, uint64_t OldSizeInBits, uint64_t NewSizeInBits, bool IsOverwriteEnd)
static bool isShortenableAtTheEnd(Instruction *I)
Returns true if the end of this instruction can be safely shortened in length.
static bool isNoopIntrinsic(Instruction *I)
static ConstantRangeList getIntersectedInitRangeList(ArrayRef< ArgumentInitInfo > Args, bool CallHasNoUnwindAttr)
static cl::opt< bool > EnablePartialStoreMerging("enable-dse-partial-store-merging", cl::init(true), cl::Hidden, cl::desc("Enable partial store merging in DSE"))
static bool tryToShortenBegin(Instruction *DeadI, OverlapIntervalsTy &IntervalMap, int64_t &DeadStart, uint64_t &DeadSize)
std::map< int64_t, int64_t > OverlapIntervalsTy
static void pushMemUses(MemoryAccess *Acc, SmallVectorImpl< MemoryAccess * > &WorkList, SmallPtrSetImpl< MemoryAccess * > &Visited)
static bool isShortenableAtTheBeginning(Instruction *I)
Returns true if the beginning of this instruction can be safely shortened in length.
static cl::opt< unsigned > MemorySSADefsPerBlockLimit("dse-memoryssa-defs-per-block-limit", cl::init(5000), cl::Hidden, cl::desc("The number of MemoryDefs we consider as candidates to eliminated " "other stores per basic block (default = 5000)"))
static Constant * tryToMergePartialOverlappingStores(StoreInst *KillingI, StoreInst *DeadI, int64_t KillingOffset, int64_t DeadOffset, const DataLayout &DL, BatchAAResults &AA, DominatorTree *DT)
static bool memoryIsNotModifiedBetween(Instruction *FirstI, Instruction *SecondI, BatchAAResults &AA, const DataLayout &DL, DominatorTree *DT)
Returns true if the memory which is accessed by the second instruction is not modified between the fi...
static OverwriteResult isMaskedStoreOverwrite(const Instruction *KillingI, const Instruction *DeadI, BatchAAResults &AA)
Check if two instruction are masked stores that completely overwrite one another.
static cl::opt< unsigned > MemorySSAOtherBBStepCost("dse-memoryssa-otherbb-cost", cl::init(5), cl::Hidden, cl::desc("The cost of a step in a different basic " "block than the killing MemoryDef" "(default = 5)"))
static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart, uint64_t &DeadSize, int64_t KillingStart, uint64_t KillingSize, bool IsOverwriteEnd)
static cl::opt< unsigned > MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(150), cl::Hidden, cl::desc("The number of memory instructions to scan for " "dead store elimination (default = 150)"))
static bool isFuncLocalAndNotCaptured(Value *Arg, const CallBase *CB, EarliestEscapeAnalysis &EA)
static cl::opt< unsigned > MemorySSASameBBStepCost("dse-memoryssa-samebb-cost", cl::init(1), cl::Hidden, cl::desc("The cost of a step in the same basic block as the killing MemoryDef" "(default = 1)"))
static cl::opt< bool > EnablePartialOverwriteTracking("enable-dse-partial-overwrite-tracking", cl::init(true), cl::Hidden, cl::desc("Enable partial-overwrite tracking in DSE"))
static OverwriteResult isPartialOverwrite(const MemoryLocation &KillingLoc, const MemoryLocation &DeadLoc, int64_t KillingOff, int64_t DeadOff, Instruction *DeadI, InstOverlapIntervalsTy &IOL)
Return 'OW_Complete' if a store to the 'KillingLoc' location completely overwrites a store to the 'De...
static cl::opt< unsigned > MemorySSAPartialStoreLimit("dse-memoryssa-partial-store-limit", cl::init(5), cl::Hidden, cl::desc("The maximum number candidates that only partially overwrite the " "killing MemoryDef to consider" " (default = 5)"))
static std::optional< TypeSize > getPointerSize(const Value *V, const DataLayout &DL, const TargetLibraryInfo &TLI, const Function *F)
static bool tryToShortenEnd(Instruction *DeadI, OverlapIntervalsTy &IntervalMap, int64_t &DeadStart, uint64_t &DeadSize)
static cl::opt< unsigned > MaxDepthRecursion("dse-max-dom-cond-depth", cl::init(1024), cl::Hidden, cl::desc("Max dominator tree recursion depth for eliminating redundant " "stores via dominating conditions"))
static void adjustArgAttributes(AnyMemIntrinsic *Intrinsic, unsigned ArgNo, uint64_t PtrOffset)
Update the attributes given that a memory access is updated (the dereferenced pointer could be moved ...
static cl::opt< unsigned > MemorySSAUpwardsStepLimit("dse-memoryssa-walklimit", cl::init(90), cl::Hidden, cl::desc("The maximum number of steps while walking upwards to find " "MemoryDefs that may be killed (default = 90)"))
static cl::opt< bool > OptimizeMemorySSA("dse-optimize-memoryssa", cl::init(true), cl::Hidden, cl::desc("Allow DSE to optimize memory accesses."))
static bool hasInitializesAttr(Instruction *I)
static cl::opt< unsigned > MemorySSAPathCheckLimit("dse-memoryssa-path-check-limit", cl::init(50), cl::Hidden, cl::desc("The maximum number of blocks to check when trying to prove that " "all paths to an exit go through a killing block (default = 50)"))
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
This file defines the DenseMap class.
early cse Early CSE w MemorySSA
static bool runOnFunction(Function &F, bool PostInlining)
This is the interface for a simple mod/ref and alias analysis over globals.
Hexagon Common GEP
#define _
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
static void deleteDeadInstruction(Instruction *I)
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
This file implements a map that provides insertion order iteration.
This file provides utility analysis objects describing memory locations.
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
Contains a collection of routines for determining if a given instruction is guaranteed to execute if ...
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
if(PassOpts->AAPipeline)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
const SmallVectorImpl< MachineOperand > & Cond
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:119
static bool VisitNode(MachineDomTreeNode *Node, Register TLSBaseAddrReg)
A manager for alias analyses.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1055
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition APInt.h:259
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1511
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1585
@ NoAlias
The two locations do not alias at all.
@ PartialAlias
The two locations alias, but only due to a partial overlap.
@ MustAlias
The two locations precisely alias each other.
constexpr int32_t getOffset() const
constexpr bool hasOffset() const
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270
This class represents an incoming formal argument to a Function.
Definition Argument.h:32
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
An immutable pass that tracks lazily created AssumptionCache objects.
This class stores enough information to efficiently remove some attributes from an existing AttrBuild...
AttributeMask & addAttribute(Attribute::AttrKind Val)
Add an attribute to the mask.
This class holds the attributes for a particular argument, parameter, function, or return value.
Definition Attributes.h:407
LLVM_ABI ArrayRef< ConstantRange > getValueAsConstantRangeList() const
Return the attribute's value as a ConstantRange array.
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
bool isValid() const
Return true if the attribute is any kind of attribute.
Definition Attributes.h:261
LLVM Basic Block Representation.
Definition BasicBlock.h:62
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
void setCallingConv(CallingConv::ID CC)
LLVM_ABI bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
Attribute getParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Get the attribute of a given kind from a given arg.
bool isByValArgument(unsigned ArgNo) const
Determine whether this argument is passed by value.
LLVM_ABI bool onlyAccessesInaccessibleMemOrArgMem() const
Determine if the function may only access memory that is either inaccessible from the IR or pointed t...
bool doesNotThrow() const
Determine if the call cannot unwind.
Value * getArgOperand(unsigned i) const
LLVM_ABI Value * getArgOperandWithAttribute(Attribute::AttrKind Kind) const
If one of the arguments has the specified attribute, returns its operand value.
unsigned arg_size() const
This class represents a list of constant ranges.
bool empty() const
Return true if this list contains no members.
LLVM_ABI ConstantRangeList intersectWith(const ConstantRangeList &CRL) const
Return the range list that results from the intersection of this ConstantRangeList with another Const...
const APInt & getLower() const
Return the lower value for this range.
const APInt & getUpper() const
Return the upper value for this range.
This is an important base class in LLVM.
Definition Constant.h:43
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition Constant.h:64
Analysis pass which computes a CycleInfo.
Legacy analysis pass which computes a CycleInfo.
static DIAssignID * getDistinct(LLVMContext &Context)
DbgVariableFragmentInfo FragmentInfo
static LLVM_ABI std::optional< DIExpression * > createFragmentExpression(const DIExpression *Expr, unsigned OffsetInBits, unsigned SizeInBits)
Create a DIExpression to describe one part of an aggregate variable that is fragmented across multipl...
LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
Record of a variable value-assignment, aka a non instruction representation of the dbg....
static bool shouldExecute(CounterInfo &Counter)
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:239
Analysis pass which computes a DominatorTree.
Definition Dominators.h:278
DomTreeNodeBase< NodeT > * getRootNode()
getRootNode - This returns the entry node for the CFG of the function.
NodeT * findNearestCommonDominator(NodeT *A, NodeT *B) const
Find nearest common dominator basic block for basic block A and B.
iterator_range< root_iterator > roots()
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
Legacy analysis pass which computes a DominatorTree.
Definition Dominators.h:314
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:159
LLVM_ABI bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Context-sensitive CaptureAnalysis provider, which computes and caches the earliest common dominator c...
void removeInstruction(Instruction *I)
CaptureComponents getCapturesBefore(const Value *Object, const Instruction *I, bool OrAt, bool ReturnCaptures) override
Return how Object may be captured before instruction I, considering only provenance captures.
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
const BasicBlock & getEntryBlock() const
Definition Function.h:809
CycleT * getCycle(const BlockT *Block) const
Find the innermost cycle containing a given block.
static GetElementPtrInst * CreateInBounds(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Create an "inbounds" getelementptr.
Legacy wrapper pass to provide the GlobalsAAResult object.
bool isEquality() const
Return true if this predicate is either EQ or NE.
LLVM_ABI bool mayThrow(bool IncludePhaseOneUnwind=false) const LLVM_READONLY
Return true if this instruction may throw an exception.
LLVM_ABI bool mayWriteToMemory() const LLVM_READONLY
Return true if this instruction may modify memory.
LLVM_ABI bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI bool isIdenticalToWhenDefined(const Instruction *I, bool IntersectAttrs=false) const LLVM_READONLY
This is like isIdenticalTo, except that it ignores the SubclassOptionalData flags,...
LLVM_ABI bool mayReadFromMemory() const LLVM_READONLY
Return true if this instruction may read memory.
LLVM_ABI AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
const_iterator begin() const
bool empty() const
empty - Return true when no intervals are mapped.
const_iterator end() const
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
bool hasValue() const
static LocationSize precise(uint64_t Value)
bool isScalable() const
TypeSize getValue() const
bool isPrecise() const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1572
This class implements a map that also provides access to all stored values in a deterministic order.
Definition MapVector.h:38
Value * getLength() const
Value * getValue() const
BasicBlock * getBlock() const
Definition MemorySSA.h:162
Represents a read-write access to memory, whether it is a must-alias, or a may-alias.
Definition MemorySSA.h:371
void setOptimized(MemoryAccess *MA)
Definition MemorySSA.h:392
A wrapper analysis pass for the legacy pass manager that exposes a MemoryDepnedenceResults instance.
Representation for a specific memory location.
static LLVM_ABI MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
LocationSize Size
The maximum size of the location, in address-units, or UnknownSize if the size is not known.
static MemoryLocation getBeforeOrAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location before or after Ptr, while remaining within the underl...
static MemoryLocation getAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location after Ptr, while remaining within the underlying objec...
MemoryLocation getWithNewPtr(const Value *NewPtr) const
const Value * Ptr
The address of the start of the location.
static LLVM_ABI MemoryLocation getForDest(const MemIntrinsic *MI)
Return a location representing the destination of a memory set or transfer.
static LLVM_ABI std::optional< MemoryLocation > getOrNone(const Instruction *Inst)
static LLVM_ABI MemoryLocation getForArgument(const CallBase *Call, unsigned ArgIdx, const TargetLibraryInfo *TLI)
Return a location representing a particular argument of a call.
An analysis that produces MemorySSA for a function.
Definition MemorySSA.h:922
MemoryAccess * getClobberingMemoryAccess(const Instruction *I, BatchAAResults &AA)
Given a memory Mod/Ref/ModRef'ing instruction, calling this will give you the nearest dominating Memo...
Definition MemorySSA.h:1035
Legacy analysis pass which computes MemorySSA.
Definition MemorySSA.h:975
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition MemorySSA.h:702
DefsList * getBlockDefs(const BasicBlock *BB) const
Return the list of MemoryDef's and MemoryPhi's for a given basic block.
Definition MemorySSA.h:765
LLVM_ABI MemorySSAWalker * getSkipSelfWalker()
LLVM_ABI bool dominates(const MemoryAccess *A, const MemoryAccess *B) const
Given two memory accesses in potentially different blocks, determine whether MemoryAccess A dominates...
LLVM_ABI MemorySSAWalker * getWalker()
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
Definition MemorySSA.h:720
bool isLiveOnEntryDef(const MemoryAccess *MA) const
Return true if MA represents the live on entry value.
Definition MemorySSA.h:740
MemoryAccess * getDefiningAccess() const
Get the access that produces the memory state used by this Use.
Definition MemorySSA.h:260
Instruction * getMemoryInst() const
Get the instruction that this MemoryUse represents.
Definition MemorySSA.h:257
PHITransAddr - An address value which tracks and handles phi translation.
LLVM_ABI Value * translateValue(BasicBlock *CurBB, BasicBlock *PredBB, const DominatorTree *DT, bool MustDominate)
translateValue - PHI translate the current address up the CFG from CurBB to Pred, updating our state ...
LLVM_ABI bool isPotentiallyPHITranslatable() const
isPotentiallyPHITranslatable - If this needs PHI translation, return true if we have some hope of doi...
bool needsPHITranslationFromBlock(BasicBlock *BB) const
needsPHITranslationFromBlock - Return true if moving from the specified BasicBlock to its predecessor...
Value * getAddr() const
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Analysis pass which computes a PostDominatorTree.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
LLVM_ABI bool dominates(const Instruction *I1, const Instruction *I2) const
Return true if I1 dominates I2.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:103
void insert_range(Range &&R)
Definition SetVector.h:176
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
iterator begin() const
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this store instruction.
Value * getValueOperand()
bool isUnordered() const
constexpr bool empty() const
Check if the string is empty.
Definition StringRef.h:141
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool has(LibFunc F) const
Tests whether a library function is available.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:282
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:307
bool isVoidTy() const
Return true if this is 'void'.
Definition Type.h:141
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
op_range operands()
Definition User.h:267
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:258
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition Value.cpp:709
iterator_range< use_iterator > uses()
Definition Value.h:380
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
Definition Attributor.h:165
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
bool match(Val *V, const Pattern &P)
match_bind< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
CmpClass_match< LHS, RHS, ICmpInst, true > m_c_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
Matches an ICmp with a predicate over LHS and RHS in either order.
auto m_Value()
Match an arbitrary value and ignore it.
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
brc_match< Cond_t, match_bind< BasicBlock >, match_bind< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)
Return a range of dbg_assign records for which Inst performs the assignment they encode.
Definition DebugInfo.h:204
LLVM_ABI bool calculateFragmentIntersect(const DataLayout &DL, const Value *Dest, uint64_t SliceOffsetInBits, uint64_t SliceSizeInBits, const DbgVariableRecord *DVRAssign, std::optional< DIExpression::FragmentInfo > &Result)
Calculate the fragment of the variable in DAI covered from (Dest + SliceOffsetInBits) to to (Dest + S...
initializer< Ty > init(const Ty &Val)
NodeAddr< DefNode * > Def
Definition RDFGraph.h:384
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:315
LLVM_ABI void initializeDSELegacyPassPass(PassRegistry &)
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI Constant * getInitialValueOfAllocation(const Value *V, const TargetLibraryInfo *TLI, Type *Ty)
If this is a call to an allocation function that initializes memory to a fixed value,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool isStrongerThanMonotonic(AtomicOrdering AO)
@ Uninitialized
Definition Threading.h:60
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition Alignment.h:134
AllocFnKind
Definition Attributes.h:53
LLVM_ABI void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
Definition Utils.cpp:1687
Value * GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, const DataLayout &DL, bool AllowNonInbounds=true)
Analyze the specified pointer to see if it can be expressed as a base pointer plus a constant offset.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:633
LLVM_ABI bool isNoAliasCall(const Value *V)
Return true if this pointer is returned by a noalias function.
DomTreeNodeBase< BasicBlock > DomTreeNode
Definition Dominators.h:94
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
CycleInfo::CycleT Cycle
Definition CycleInfo.h:26
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
LLVM_ABI bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition Local.cpp:403
LLVM_ABI bool getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout &DL, const TargetLibraryInfo *TLI, ObjectSizeOpts Opts={})
Compute the size of the object pointed by Ptr.
auto reverse(ContainerTy &&C)
Definition STLExtras.h:407
LLVM_ABI bool canReplacePointersIfEqual(const Value *From, const Value *To, const DataLayout &DL)
Returns true if a pointer value From can be replaced with another pointer value \To if they are deeme...
Definition Loads.cpp:882
bool isModSet(const ModRefInfo MRI)
Definition ModRef.h:49
LLVM_ABI bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
LLVM_ABI bool AreStatisticsEnabled()
Check if statistics are enabled.
LLVM_ABI bool isNotVisibleOnUnwind(const Value *Object, bool &RequiresNoCaptureBeforeUnwind)
Return true if Object memory is not visible after an unwind, in the sense that program semantics cann...
LLVM_ABI Value * emitCalloc(Value *Num, Value *Size, IRBuilderBase &B, const TargetLibraryInfo &TLI, unsigned AddrSpace)
Emit a call to the calloc function.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
auto post_order(const T &G)
Post-order traversal of a graph.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
uint64_t offsetToAlignment(uint64_t Value, Align Alignment)
Returns the offset to the next integer (mod 2**64) that is greater than or equal to Value and is a mu...
Definition Alignment.h:186
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI bool salvageKnowledge(Instruction *I, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr)
Calls BuildAssumeFromInst and if the resulting llvm.assume is valid insert if before I.
LLVM_ABI bool PointerMayBeCaptured(const Value *V, bool ReturnCaptures, unsigned MaxUsesToExplore=0)
PointerMayBeCaptured - Return true if this pointer value may be captured by the enclosing function (w...
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI Value * getFreedOperand(const CallBase *CB, const TargetLibraryInfo *TLI)
If this if a call to a free function, return the freed operand.
LLVM_ABI bool isIdentifiedFunctionLocal(const Value *V)
Return true if V is umabigously identified at the function-level.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI FunctionPass * createDeadStoreEliminationPass()
LLVM_ABI Value * isBytewiseValue(Value *V, const DataLayout &DL)
If the specified value can be set by repeating the same byte in memory, return the i8 value that it i...
auto predecessors(const MachineBasicBlock *BB)
bool capturesAnything(CaptureComponents CC)
Definition ModRef.h:379
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
bool capturesNothing(CaptureComponents CC)
Definition ModRef.h:375
LLVM_ABI bool isIdentifiedObject(const Value *V)
Return true if this pointer refers to a distinct and identifiable object.
bool isStrongerThan(AtomicOrdering AO, AtomicOrdering Other)
Returns true if ao is stronger than other as defined by the AtomicOrdering lattice,...
bool isRefSet(const ModRefInfo MRI)
Definition ModRef.h:52
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
Various options to control the behavior of getObjectSize.
bool NullIsUnknownSize
If this is true, null pointers in address space 0 will be treated as though they can't be evaluated.