LLVM 19.0.0git
MemCpyOptimizer.cpp
Go to the documentation of this file.
1//===- MemCpyOptimizer.cpp - Optimize use of memcpy and friends -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass performs various transformations related to eliminating memcpy
10// calls, or transforming sets of stores into memset's.
11//
12//===----------------------------------------------------------------------===//
13
15#include "llvm/ADT/DenseSet.h"
16#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/Statistic.h"
22#include "llvm/Analysis/CFG.h"
26#include "llvm/Analysis/Loads.h"
33#include "llvm/IR/BasicBlock.h"
34#include "llvm/IR/Constants.h"
35#include "llvm/IR/DataLayout.h"
37#include "llvm/IR/Dominators.h"
38#include "llvm/IR/Function.h"
40#include "llvm/IR/IRBuilder.h"
41#include "llvm/IR/InstrTypes.h"
42#include "llvm/IR/Instruction.h"
45#include "llvm/IR/Intrinsics.h"
46#include "llvm/IR/LLVMContext.h"
47#include "llvm/IR/Module.h"
48#include "llvm/IR/PassManager.h"
49#include "llvm/IR/Type.h"
50#include "llvm/IR/User.h"
51#include "llvm/IR/Value.h"
53#include "llvm/Support/Debug.h"
57#include <algorithm>
58#include <cassert>
59#include <cstdint>
60#include <optional>
61
62using namespace llvm;
63
64#define DEBUG_TYPE "memcpyopt"
65
67 "enable-memcpyopt-without-libcalls", cl::Hidden,
68 cl::desc("Enable memcpyopt even when libcalls are disabled"));
69
70STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted");
71STATISTIC(NumMemSetInfer, "Number of memsets inferred");
72STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy");
73STATISTIC(NumCpyToSet, "Number of memcpys converted to memset");
74STATISTIC(NumCallSlot, "Number of call slot optimizations performed");
75STATISTIC(NumStackMove, "Number of stack-move optimizations performed");
76
77namespace {
78
79/// Represents a range of memset'd bytes with the ByteVal value.
80/// This allows us to analyze stores like:
81/// store 0 -> P+1
82/// store 0 -> P+0
83/// store 0 -> P+3
84/// store 0 -> P+2
85/// which sometimes happens with stores to arrays of structs etc. When we see
86/// the first store, we make a range [1, 2). The second store extends the range
87/// to [0, 2). The third makes a new range [2, 3). The fourth store joins the
88/// two ranges into [0, 3) which is memset'able.
89struct MemsetRange {
90 // Start/End - A semi range that describes the span that this range covers.
91 // The range is closed at the start and open at the end: [Start, End).
92 int64_t Start, End;
93
94 /// StartPtr - The getelementptr instruction that points to the start of the
95 /// range.
96 Value *StartPtr;
97
98 /// Alignment - The known alignment of the first store.
99 MaybeAlign Alignment;
100
101 /// TheStores - The actual stores that make up this range.
103
104 bool isProfitableToUseMemset(const DataLayout &DL) const;
105};
106
107} // end anonymous namespace
108
109bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const {
110 // If we found more than 4 stores to merge or 16 bytes, use memset.
111 if (TheStores.size() >= 4 || End - Start >= 16)
112 return true;
113
114 // If there is nothing to merge, don't do anything.
115 if (TheStores.size() < 2)
116 return false;
117
118 // If any of the stores are a memset, then it is always good to extend the
119 // memset.
120 for (Instruction *SI : TheStores)
121 if (!isa<StoreInst>(SI))
122 return true;
123
124 // Assume that the code generator is capable of merging pairs of stores
125 // together if it wants to.
126 if (TheStores.size() == 2)
127 return false;
128
129 // If we have fewer than 8 stores, it can still be worthwhile to do this.
130 // For example, merging 4 i8 stores into an i32 store is useful almost always.
131 // However, merging 2 32-bit stores isn't useful on a 32-bit architecture (the
132 // memset will be split into 2 32-bit stores anyway) and doing so can
133 // pessimize the llvm optimizer.
134 //
135 // Since we don't have perfect knowledge here, make some assumptions: assume
136 // the maximum GPR width is the same size as the largest legal integer
137 // size. If so, check to see whether we will end up actually reducing the
138 // number of stores used.
139 unsigned Bytes = unsigned(End - Start);
140 unsigned MaxIntSize = DL.getLargestLegalIntTypeSizeInBits() / 8;
141 if (MaxIntSize == 0)
142 MaxIntSize = 1;
143 unsigned NumPointerStores = Bytes / MaxIntSize;
144
145 // Assume the remaining bytes if any are done a byte at a time.
146 unsigned NumByteStores = Bytes % MaxIntSize;
147
148 // If we will reduce the # stores (according to this heuristic), do the
149 // transformation. This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32
150 // etc.
151 return TheStores.size() > NumPointerStores + NumByteStores;
152}
153
154namespace {
155
156class MemsetRanges {
157 using range_iterator = SmallVectorImpl<MemsetRange>::iterator;
158
159 /// A sorted list of the memset ranges.
161
162 const DataLayout &DL;
163
164public:
165 MemsetRanges(const DataLayout &DL) : DL(DL) {}
166
168
169 const_iterator begin() const { return Ranges.begin(); }
170 const_iterator end() const { return Ranges.end(); }
171 bool empty() const { return Ranges.empty(); }
172
173 void addInst(int64_t OffsetFromFirst, Instruction *Inst) {
174 if (auto *SI = dyn_cast<StoreInst>(Inst))
175 addStore(OffsetFromFirst, SI);
176 else
177 addMemSet(OffsetFromFirst, cast<MemSetInst>(Inst));
178 }
179
180 void addStore(int64_t OffsetFromFirst, StoreInst *SI) {
181 TypeSize StoreSize = DL.getTypeStoreSize(SI->getOperand(0)->getType());
182 assert(!StoreSize.isScalable() && "Can't track scalable-typed stores");
183 addRange(OffsetFromFirst, StoreSize.getFixedValue(),
184 SI->getPointerOperand(), SI->getAlign(), SI);
185 }
186
187 void addMemSet(int64_t OffsetFromFirst, MemSetInst *MSI) {
188 int64_t Size = cast<ConstantInt>(MSI->getLength())->getZExtValue();
189 addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getDestAlign(), MSI);
190 }
191
192 void addRange(int64_t Start, int64_t Size, Value *Ptr, MaybeAlign Alignment,
193 Instruction *Inst);
194};
195
196} // end anonymous namespace
197
198/// Add a new store to the MemsetRanges data structure. This adds a
199/// new range for the specified store at the specified offset, merging into
200/// existing ranges as appropriate.
201void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,
202 MaybeAlign Alignment, Instruction *Inst) {
203 int64_t End = Start + Size;
204
205 range_iterator I = partition_point(
206 Ranges, [=](const MemsetRange &O) { return O.End < Start; });
207
208 // We now know that I == E, in which case we didn't find anything to merge
209 // with, or that Start <= I->End. If End < I->Start or I == E, then we need
210 // to insert a new range. Handle this now.
211 if (I == Ranges.end() || End < I->Start) {
212 MemsetRange &R = *Ranges.insert(I, MemsetRange());
213 R.Start = Start;
214 R.End = End;
215 R.StartPtr = Ptr;
216 R.Alignment = Alignment;
217 R.TheStores.push_back(Inst);
218 return;
219 }
220
221 // This store overlaps with I, add it.
222 I->TheStores.push_back(Inst);
223
224 // At this point, we may have an interval that completely contains our store.
225 // If so, just add it to the interval and return.
226 if (I->Start <= Start && I->End >= End)
227 return;
228
229 // Now we know that Start <= I->End and End >= I->Start so the range overlaps
230 // but is not entirely contained within the range.
231
232 // See if the range extends the start of the range. In this case, it couldn't
233 // possibly cause it to join the prior range, because otherwise we would have
234 // stopped on *it*.
235 if (Start < I->Start) {
236 I->Start = Start;
237 I->StartPtr = Ptr;
238 I->Alignment = Alignment;
239 }
240
241 // Now we know that Start <= I->End and Start >= I->Start (so the startpoint
242 // is in or right at the end of I), and that End >= I->Start. Extend I out to
243 // End.
244 if (End > I->End) {
245 I->End = End;
246 range_iterator NextI = I;
247 while (++NextI != Ranges.end() && End >= NextI->Start) {
248 // Merge the range in.
249 I->TheStores.append(NextI->TheStores.begin(), NextI->TheStores.end());
250 if (NextI->End > I->End)
251 I->End = NextI->End;
252 Ranges.erase(NextI);
253 NextI = I;
254 }
255 }
256}
257
258//===----------------------------------------------------------------------===//
259// MemCpyOptLegacyPass Pass
260//===----------------------------------------------------------------------===//
261
262// Check that V is either not accessible by the caller, or unwinding cannot
263// occur between Start and End.
265 Instruction *End) {
266 assert(Start->getParent() == End->getParent() && "Must be in same block");
267 // Function can't unwind, so it also can't be visible through unwinding.
268 if (Start->getFunction()->doesNotThrow())
269 return false;
270
271 // Object is not visible on unwind.
272 // TODO: Support RequiresNoCaptureBeforeUnwind case.
273 bool RequiresNoCaptureBeforeUnwind;
275 RequiresNoCaptureBeforeUnwind) &&
276 !RequiresNoCaptureBeforeUnwind)
277 return false;
278
279 // Check whether there are any unwinding instructions in the range.
280 return any_of(make_range(Start->getIterator(), End->getIterator()),
281 [](const Instruction &I) { return I.mayThrow(); });
282}
283
284void MemCpyOptPass::eraseInstruction(Instruction *I) {
285 MSSAU->removeMemoryAccess(I);
286 I->eraseFromParent();
287}
288
289// Check for mod or ref of Loc between Start and End, excluding both boundaries.
290// Start and End must be in the same block.
291// If SkippedLifetimeStart is provided, skip over one clobbering lifetime.start
292// intrinsic and store it inside SkippedLifetimeStart.
294 const MemoryUseOrDef *Start,
295 const MemoryUseOrDef *End,
296 Instruction **SkippedLifetimeStart = nullptr) {
297 assert(Start->getBlock() == End->getBlock() && "Only local supported");
298 for (const MemoryAccess &MA :
299 make_range(++Start->getIterator(), End->getIterator())) {
300 Instruction *I = cast<MemoryUseOrDef>(MA).getMemoryInst();
301 if (isModOrRefSet(AA.getModRefInfo(I, Loc))) {
302 auto *II = dyn_cast<IntrinsicInst>(I);
303 if (II && II->getIntrinsicID() == Intrinsic::lifetime_start &&
304 SkippedLifetimeStart && !*SkippedLifetimeStart) {
305 *SkippedLifetimeStart = I;
306 continue;
307 }
308
309 return true;
310 }
311 }
312 return false;
313}
314
315// Check for mod of Loc between Start and End, excluding both boundaries.
316// Start and End can be in different blocks.
318 MemoryLocation Loc, const MemoryUseOrDef *Start,
319 const MemoryUseOrDef *End) {
320 if (isa<MemoryUse>(End)) {
321 // For MemoryUses, getClobberingMemoryAccess may skip non-clobbering writes.
322 // Manually check read accesses between Start and End, if they are in the
323 // same block, for clobbers. Otherwise assume Loc is clobbered.
324 return Start->getBlock() != End->getBlock() ||
325 any_of(
326 make_range(std::next(Start->getIterator()), End->getIterator()),
327 [&AA, Loc](const MemoryAccess &Acc) {
328 if (isa<MemoryUse>(&Acc))
329 return false;
330 Instruction *AccInst =
331 cast<MemoryUseOrDef>(&Acc)->getMemoryInst();
332 return isModSet(AA.getModRefInfo(AccInst, Loc));
333 });
334 }
335
336 // TODO: Only walk until we hit Start.
338 End->getDefiningAccess(), Loc, AA);
339 return !MSSA->dominates(Clobber, Start);
340}
341
342// Update AA metadata
343static void combineAAMetadata(Instruction *ReplInst, Instruction *I) {
344 // FIXME: MD_tbaa_struct and MD_mem_parallel_loop_access should also be
345 // handled here, but combineMetadata doesn't support them yet
346 unsigned KnownIDs[] = {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
347 LLVMContext::MD_noalias,
348 LLVMContext::MD_invariant_group,
349 LLVMContext::MD_access_group};
350 combineMetadata(ReplInst, I, KnownIDs, true);
351}
352
353/// When scanning forward over instructions, we look for some other patterns to
354/// fold away. In particular, this looks for stores to neighboring locations of
355/// memory. If it sees enough consecutive ones, it attempts to merge them
356/// together into a memcpy/memset.
357Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
358 Value *StartPtr,
359 Value *ByteVal) {
360 const DataLayout &DL = StartInst->getModule()->getDataLayout();
361
362 // We can't track scalable types
363 if (auto *SI = dyn_cast<StoreInst>(StartInst))
364 if (DL.getTypeStoreSize(SI->getOperand(0)->getType()).isScalable())
365 return nullptr;
366
367 // Okay, so we now have a single store that can be splatable. Scan to find
368 // all subsequent stores of the same value to offset from the same pointer.
369 // Join these together into ranges, so we can decide whether contiguous blocks
370 // are stored.
371 MemsetRanges Ranges(DL);
372
373 BasicBlock::iterator BI(StartInst);
374
375 // Keeps track of the last memory use or def before the insertion point for
376 // the new memset. The new MemoryDef for the inserted memsets will be inserted
377 // after MemInsertPoint.
378 MemoryUseOrDef *MemInsertPoint = nullptr;
379 for (++BI; !BI->isTerminator(); ++BI) {
380 auto *CurrentAcc = cast_or_null<MemoryUseOrDef>(
381 MSSAU->getMemorySSA()->getMemoryAccess(&*BI));
382 if (CurrentAcc)
383 MemInsertPoint = CurrentAcc;
384
385 // Calls that only access inaccessible memory do not block merging
386 // accessible stores.
387 if (auto *CB = dyn_cast<CallBase>(BI)) {
388 if (CB->onlyAccessesInaccessibleMemory())
389 continue;
390 }
391
392 if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) {
393 // If the instruction is readnone, ignore it, otherwise bail out. We
394 // don't even allow readonly here because we don't want something like:
395 // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A).
396 if (BI->mayWriteToMemory() || BI->mayReadFromMemory())
397 break;
398 continue;
399 }
400
401 if (auto *NextStore = dyn_cast<StoreInst>(BI)) {
402 // If this is a store, see if we can merge it in.
403 if (!NextStore->isSimple())
404 break;
405
406 Value *StoredVal = NextStore->getValueOperand();
407
408 // Don't convert stores of non-integral pointer types to memsets (which
409 // stores integers).
410 if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()))
411 break;
412
413 // We can't track ranges involving scalable types.
414 if (DL.getTypeStoreSize(StoredVal->getType()).isScalable())
415 break;
416
417 // Check to see if this stored value is of the same byte-splattable value.
418 Value *StoredByte = isBytewiseValue(StoredVal, DL);
419 if (isa<UndefValue>(ByteVal) && StoredByte)
420 ByteVal = StoredByte;
421 if (ByteVal != StoredByte)
422 break;
423
424 // Check to see if this store is to a constant offset from the start ptr.
425 std::optional<int64_t> Offset =
426 NextStore->getPointerOperand()->getPointerOffsetFrom(StartPtr, DL);
427 if (!Offset)
428 break;
429
430 Ranges.addStore(*Offset, NextStore);
431 } else {
432 auto *MSI = cast<MemSetInst>(BI);
433
434 if (MSI->isVolatile() || ByteVal != MSI->getValue() ||
435 !isa<ConstantInt>(MSI->getLength()))
436 break;
437
438 // Check to see if this store is to a constant offset from the start ptr.
439 std::optional<int64_t> Offset =
440 MSI->getDest()->getPointerOffsetFrom(StartPtr, DL);
441 if (!Offset)
442 break;
443
444 Ranges.addMemSet(*Offset, MSI);
445 }
446 }
447
448 // If we have no ranges, then we just had a single store with nothing that
449 // could be merged in. This is a very common case of course.
450 if (Ranges.empty())
451 return nullptr;
452
453 // If we had at least one store that could be merged in, add the starting
454 // store as well. We try to avoid this unless there is at least something
455 // interesting as a small compile-time optimization.
456 Ranges.addInst(0, StartInst);
457
458 // If we create any memsets, we put it right before the first instruction that
459 // isn't part of the memset block. This ensure that the memset is dominated
460 // by any addressing instruction needed by the start of the block.
461 IRBuilder<> Builder(&*BI);
462
463 // Now that we have full information about ranges, loop over the ranges and
464 // emit memset's for anything big enough to be worthwhile.
465 Instruction *AMemSet = nullptr;
466 for (const MemsetRange &Range : Ranges) {
467 if (Range.TheStores.size() == 1)
468 continue;
469
470 // If it is profitable to lower this range to memset, do so now.
471 if (!Range.isProfitableToUseMemset(DL))
472 continue;
473
474 // Otherwise, we do want to transform this! Create a new memset.
475 // Get the starting pointer of the block.
476 StartPtr = Range.StartPtr;
477
478 AMemSet = Builder.CreateMemSet(StartPtr, ByteVal, Range.End - Range.Start,
479 Range.Alignment);
480 AMemSet->mergeDIAssignID(Range.TheStores);
481
482 LLVM_DEBUG(dbgs() << "Replace stores:\n"; for (Instruction *SI
483 : Range.TheStores) dbgs()
484 << *SI << '\n';
485 dbgs() << "With: " << *AMemSet << '\n');
486 if (!Range.TheStores.empty())
487 AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc());
488
489 auto *NewDef = cast<MemoryDef>(
490 MemInsertPoint->getMemoryInst() == &*BI
491 ? MSSAU->createMemoryAccessBefore(AMemSet, nullptr, MemInsertPoint)
492 : MSSAU->createMemoryAccessAfter(AMemSet, nullptr, MemInsertPoint));
493 MSSAU->insertDef(NewDef, /*RenameUses=*/true);
494 MemInsertPoint = NewDef;
495
496 // Zap all the stores.
497 for (Instruction *SI : Range.TheStores)
498 eraseInstruction(SI);
499
500 ++NumMemSetInfer;
501 }
502
503 return AMemSet;
504}
505
506// This method try to lift a store instruction before position P.
507// It will lift the store and its argument + that anything that
508// may alias with these.
509// The method returns true if it was successful.
510bool MemCpyOptPass::moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI) {
511 // If the store alias this position, early bail out.
512 MemoryLocation StoreLoc = MemoryLocation::get(SI);
513 if (isModOrRefSet(AA->getModRefInfo(P, StoreLoc)))
514 return false;
515
516 // Keep track of the arguments of all instruction we plan to lift
517 // so we can make sure to lift them as well if appropriate.
519 auto AddArg = [&](Value *Arg) {
520 auto *I = dyn_cast<Instruction>(Arg);
521 if (I && I->getParent() == SI->getParent()) {
522 // Cannot hoist user of P above P
523 if (I == P)
524 return false;
525 Args.insert(I);
526 }
527 return true;
528 };
529 if (!AddArg(SI->getPointerOperand()))
530 return false;
531
532 // Instruction to lift before P.
534
535 // Memory locations of lifted instructions.
536 SmallVector<MemoryLocation, 8> MemLocs{StoreLoc};
537
538 // Lifted calls.
540
541 const MemoryLocation LoadLoc = MemoryLocation::get(LI);
542
543 for (auto I = --SI->getIterator(), E = P->getIterator(); I != E; --I) {
544 auto *C = &*I;
545
546 // Make sure hoisting does not perform a store that was not guaranteed to
547 // happen.
549 return false;
550
551 bool MayAlias = isModOrRefSet(AA->getModRefInfo(C, std::nullopt));
552
553 bool NeedLift = false;
554 if (Args.erase(C))
555 NeedLift = true;
556 else if (MayAlias) {
557 NeedLift = llvm::any_of(MemLocs, [C, this](const MemoryLocation &ML) {
558 return isModOrRefSet(AA->getModRefInfo(C, ML));
559 });
560
561 if (!NeedLift)
562 NeedLift = llvm::any_of(Calls, [C, this](const CallBase *Call) {
563 return isModOrRefSet(AA->getModRefInfo(C, Call));
564 });
565 }
566
567 if (!NeedLift)
568 continue;
569
570 if (MayAlias) {
571 // Since LI is implicitly moved downwards past the lifted instructions,
572 // none of them may modify its source.
573 if (isModSet(AA->getModRefInfo(C, LoadLoc)))
574 return false;
575 else if (const auto *Call = dyn_cast<CallBase>(C)) {
576 // If we can't lift this before P, it's game over.
577 if (isModOrRefSet(AA->getModRefInfo(P, Call)))
578 return false;
579
580 Calls.push_back(Call);
581 } else if (isa<LoadInst>(C) || isa<StoreInst>(C) || isa<VAArgInst>(C)) {
582 // If we can't lift this before P, it's game over.
583 auto ML = MemoryLocation::get(C);
584 if (isModOrRefSet(AA->getModRefInfo(P, ML)))
585 return false;
586
587 MemLocs.push_back(ML);
588 } else
589 // We don't know how to lift this instruction.
590 return false;
591 }
592
593 ToLift.push_back(C);
594 for (Value *Op : C->operands())
595 if (!AddArg(Op))
596 return false;
597 }
598
599 // Find MSSA insertion point. Normally P will always have a corresponding
600 // memory access before which we can insert. However, with non-standard AA
601 // pipelines, there may be a mismatch between AA and MSSA, in which case we
602 // will scan for a memory access before P. In either case, we know for sure
603 // that at least the load will have a memory access.
604 // TODO: Simplify this once P will be determined by MSSA, in which case the
605 // discrepancy can no longer occur.
606 MemoryUseOrDef *MemInsertPoint = nullptr;
607 if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(P)) {
608 MemInsertPoint = cast<MemoryUseOrDef>(--MA->getIterator());
609 } else {
610 const Instruction *ConstP = P;
611 for (const Instruction &I : make_range(++ConstP->getReverseIterator(),
612 ++LI->getReverseIterator())) {
613 if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(&I)) {
614 MemInsertPoint = MA;
615 break;
616 }
617 }
618 }
619
620 // We made it, we need to lift.
621 for (auto *I : llvm::reverse(ToLift)) {
622 LLVM_DEBUG(dbgs() << "Lifting " << *I << " before " << *P << "\n");
623 I->moveBefore(P);
624 assert(MemInsertPoint && "Must have found insert point");
625 if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(I)) {
626 MSSAU->moveAfter(MA, MemInsertPoint);
627 MemInsertPoint = MA;
628 }
629 }
630
631 return true;
632}
633
634bool MemCpyOptPass::processStoreOfLoad(StoreInst *SI, LoadInst *LI,
635 const DataLayout &DL,
637 if (!LI->isSimple() || !LI->hasOneUse() || LI->getParent() != SI->getParent())
638 return false;
639
640 auto *T = LI->getType();
641 // Don't introduce calls to memcpy/memmove intrinsics out of thin air if
642 // the corresponding libcalls are not available.
643 // TODO: We should really distinguish between libcall availability and
644 // our ability to introduce intrinsics.
645 if (T->isAggregateType() &&
647 (TLI->has(LibFunc_memcpy) && TLI->has(LibFunc_memmove)))) {
649
650 // We use alias analysis to check if an instruction may store to
651 // the memory we load from in between the load and the store. If
652 // such an instruction is found, we try to promote there instead
653 // of at the store position.
654 // TODO: Can use MSSA for this.
655 Instruction *P = SI;
656 for (auto &I : make_range(++LI->getIterator(), SI->getIterator())) {
657 if (isModSet(AA->getModRefInfo(&I, LoadLoc))) {
658 P = &I;
659 break;
660 }
661 }
662
663 // We found an instruction that may write to the loaded memory.
664 // We can try to promote at this position instead of the store
665 // position if nothing aliases the store memory after this and the store
666 // destination is not in the range.
667 if (P && P != SI) {
668 if (!moveUp(SI, P, LI))
669 P = nullptr;
670 }
671
672 // If a valid insertion position is found, then we can promote
673 // the load/store pair to a memcpy.
674 if (P) {
675 // If we load from memory that may alias the memory we store to,
676 // memmove must be used to preserve semantic. If not, memcpy can
677 // be used. Also, if we load from constant memory, memcpy can be used
678 // as the constant memory won't be modified.
679 bool UseMemMove = false;
680 if (isModSet(AA->getModRefInfo(SI, LoadLoc)))
681 UseMemMove = true;
682
683 IRBuilder<> Builder(P);
684 Value *Size =
685 Builder.CreateTypeSize(Builder.getInt64Ty(), DL.getTypeStoreSize(T));
686 Instruction *M;
687 if (UseMemMove)
688 M = Builder.CreateMemMove(SI->getPointerOperand(), SI->getAlign(),
689 LI->getPointerOperand(), LI->getAlign(),
690 Size);
691 else
692 M = Builder.CreateMemCpy(SI->getPointerOperand(), SI->getAlign(),
693 LI->getPointerOperand(), LI->getAlign(), Size);
694 M->copyMetadata(*SI, LLVMContext::MD_DIAssignID);
695
696 LLVM_DEBUG(dbgs() << "Promoting " << *LI << " to " << *SI << " => " << *M
697 << "\n");
698
699 auto *LastDef =
700 cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(SI));
701 auto *NewAccess = MSSAU->createMemoryAccessAfter(M, nullptr, LastDef);
702 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
703
704 eraseInstruction(SI);
705 eraseInstruction(LI);
706 ++NumMemCpyInstr;
707
708 // Make sure we do not invalidate the iterator.
709 BBI = M->getIterator();
710 return true;
711 }
712 }
713
714 // Detect cases where we're performing call slot forwarding, but
715 // happen to be using a load-store pair to implement it, rather than
716 // a memcpy.
717 BatchAAResults BAA(*AA);
718 auto GetCall = [&]() -> CallInst * {
719 // We defer this expensive clobber walk until the cheap checks
720 // have been done on the source inside performCallSlotOptzn.
721 if (auto *LoadClobber = dyn_cast<MemoryUseOrDef>(
722 MSSA->getWalker()->getClobberingMemoryAccess(LI, BAA)))
723 return dyn_cast_or_null<CallInst>(LoadClobber->getMemoryInst());
724 return nullptr;
725 };
726
727 bool Changed = performCallSlotOptzn(
728 LI, SI, SI->getPointerOperand()->stripPointerCasts(),
730 DL.getTypeStoreSize(SI->getOperand(0)->getType()),
731 std::min(SI->getAlign(), LI->getAlign()), BAA, GetCall);
732 if (Changed) {
733 eraseInstruction(SI);
734 eraseInstruction(LI);
735 ++NumMemCpyInstr;
736 return true;
737 }
738
739 // If this is a load-store pair from a stack slot to a stack slot, we
740 // might be able to perform the stack-move optimization just as we do for
741 // memcpys from an alloca to an alloca.
742 if (auto *DestAlloca = dyn_cast<AllocaInst>(SI->getPointerOperand())) {
743 if (auto *SrcAlloca = dyn_cast<AllocaInst>(LI->getPointerOperand())) {
744 if (performStackMoveOptzn(LI, SI, DestAlloca, SrcAlloca,
745 DL.getTypeStoreSize(T), BAA)) {
746 // Avoid invalidating the iterator.
747 BBI = SI->getNextNonDebugInstruction()->getIterator();
748 eraseInstruction(SI);
749 eraseInstruction(LI);
750 ++NumMemCpyInstr;
751 return true;
752 }
753 }
754 }
755
756 return false;
757}
758
759bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
760 if (!SI->isSimple())
761 return false;
762
763 // Avoid merging nontemporal stores since the resulting
764 // memcpy/memset would not be able to preserve the nontemporal hint.
765 // In theory we could teach how to propagate the !nontemporal metadata to
766 // memset calls. However, that change would force the backend to
767 // conservatively expand !nontemporal memset calls back to sequences of
768 // store instructions (effectively undoing the merging).
769 if (SI->getMetadata(LLVMContext::MD_nontemporal))
770 return false;
771
772 const DataLayout &DL = SI->getModule()->getDataLayout();
773
774 Value *StoredVal = SI->getValueOperand();
775
776 // Not all the transforms below are correct for non-integral pointers, bail
777 // until we've audited the individual pieces.
778 if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()))
779 return false;
780
781 // Load to store forwarding can be interpreted as memcpy.
782 if (auto *LI = dyn_cast<LoadInst>(StoredVal))
783 return processStoreOfLoad(SI, LI, DL, BBI);
784
785 // The following code creates memset intrinsics out of thin air. Don't do
786 // this if the corresponding libfunc is not available.
787 // TODO: We should really distinguish between libcall availability and
788 // our ability to introduce intrinsics.
789 if (!(TLI->has(LibFunc_memset) || EnableMemCpyOptWithoutLibcalls))
790 return false;
791
792 // There are two cases that are interesting for this code to handle: memcpy
793 // and memset. Right now we only handle memset.
794
795 // Ensure that the value being stored is something that can be memset'able a
796 // byte at a time like "0" or "-1" or any width, as well as things like
797 // 0xA0A0A0A0 and 0.0.
798 auto *V = SI->getOperand(0);
799 if (Value *ByteVal = isBytewiseValue(V, DL)) {
800 if (Instruction *I =
801 tryMergingIntoMemset(SI, SI->getPointerOperand(), ByteVal)) {
802 BBI = I->getIterator(); // Don't invalidate iterator.
803 return true;
804 }
805
806 // If we have an aggregate, we try to promote it to memset regardless
807 // of opportunity for merging as it can expose optimization opportunities
808 // in subsequent passes.
809 auto *T = V->getType();
810 if (T->isAggregateType()) {
811 uint64_t Size = DL.getTypeStoreSize(T);
812 IRBuilder<> Builder(SI);
813 auto *M = Builder.CreateMemSet(SI->getPointerOperand(), ByteVal, Size,
814 SI->getAlign());
815 M->copyMetadata(*SI, LLVMContext::MD_DIAssignID);
816
817 LLVM_DEBUG(dbgs() << "Promoting " << *SI << " to " << *M << "\n");
818
819 // The newly inserted memset is immediately overwritten by the original
820 // store, so we do not need to rename uses.
821 auto *StoreDef = cast<MemoryDef>(MSSA->getMemoryAccess(SI));
822 auto *NewAccess = MSSAU->createMemoryAccessBefore(M, nullptr, StoreDef);
823 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/false);
824
825 eraseInstruction(SI);
826 NumMemSetInfer++;
827
828 // Make sure we do not invalidate the iterator.
829 BBI = M->getIterator();
830 return true;
831 }
832 }
833
834 return false;
835}
836
837bool MemCpyOptPass::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) {
838 // See if there is another memset or store neighboring this memset which
839 // allows us to widen out the memset to do a single larger store.
840 if (isa<ConstantInt>(MSI->getLength()) && !MSI->isVolatile())
841 if (Instruction *I =
842 tryMergingIntoMemset(MSI, MSI->getDest(), MSI->getValue())) {
843 BBI = I->getIterator(); // Don't invalidate iterator.
844 return true;
845 }
846 return false;
847}
848
849/// Takes a memcpy and a call that it depends on,
850/// and checks for the possibility of a call slot optimization by having
851/// the call write its result directly into the destination of the memcpy.
852bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
853 Instruction *cpyStore, Value *cpyDest,
854 Value *cpySrc, TypeSize cpySize,
855 Align cpyDestAlign,
856 BatchAAResults &BAA,
857 std::function<CallInst *()> GetC) {
858 // The general transformation to keep in mind is
859 //
860 // call @func(..., src, ...)
861 // memcpy(dest, src, ...)
862 //
863 // ->
864 //
865 // memcpy(dest, src, ...)
866 // call @func(..., dest, ...)
867 //
868 // Since moving the memcpy is technically awkward, we additionally check that
869 // src only holds uninitialized values at the moment of the call, meaning that
870 // the memcpy can be discarded rather than moved.
871
872 // We can't optimize scalable types.
873 if (cpySize.isScalable())
874 return false;
875
876 // Require that src be an alloca. This simplifies the reasoning considerably.
877 auto *srcAlloca = dyn_cast<AllocaInst>(cpySrc);
878 if (!srcAlloca)
879 return false;
880
881 ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize());
882 if (!srcArraySize)
883 return false;
884
885 const DataLayout &DL = cpyLoad->getModule()->getDataLayout();
886 TypeSize SrcAllocaSize = DL.getTypeAllocSize(srcAlloca->getAllocatedType());
887 // We can't optimize scalable types.
888 if (SrcAllocaSize.isScalable())
889 return false;
890 uint64_t srcSize = SrcAllocaSize * srcArraySize->getZExtValue();
891
892 if (cpySize < srcSize)
893 return false;
894
895 CallInst *C = GetC();
896 if (!C)
897 return false;
898
899 // Lifetime marks shouldn't be operated on.
900 if (Function *F = C->getCalledFunction())
901 if (F->isIntrinsic() && F->getIntrinsicID() == Intrinsic::lifetime_start)
902 return false;
903
904 if (C->getParent() != cpyStore->getParent()) {
905 LLVM_DEBUG(dbgs() << "Call Slot: block local restriction\n");
906 return false;
907 }
908
909 MemoryLocation DestLoc =
910 isa<StoreInst>(cpyStore)
911 ? MemoryLocation::get(cpyStore)
912 : MemoryLocation::getForDest(cast<MemCpyInst>(cpyStore));
913
914 // Check that nothing touches the dest of the copy between
915 // the call and the store/memcpy.
916 Instruction *SkippedLifetimeStart = nullptr;
917 if (accessedBetween(BAA, DestLoc, MSSA->getMemoryAccess(C),
918 MSSA->getMemoryAccess(cpyStore), &SkippedLifetimeStart)) {
919 LLVM_DEBUG(dbgs() << "Call Slot: Dest pointer modified after call\n");
920 return false;
921 }
922
923 // If we need to move a lifetime.start above the call, make sure that we can
924 // actually do so. If the argument is bitcasted for example, we would have to
925 // move the bitcast as well, which we don't handle.
926 if (SkippedLifetimeStart) {
927 auto *LifetimeArg =
928 dyn_cast<Instruction>(SkippedLifetimeStart->getOperand(1));
929 if (LifetimeArg && LifetimeArg->getParent() == C->getParent() &&
930 C->comesBefore(LifetimeArg))
931 return false;
932 }
933
934 // Check that storing to the first srcSize bytes of dest will not cause a
935 // trap or data race.
936 bool ExplicitlyDereferenceableOnly;
938 ExplicitlyDereferenceableOnly) ||
939 !isDereferenceableAndAlignedPointer(cpyDest, Align(1), APInt(64, cpySize),
940 DL, C, AC, DT)) {
941 LLVM_DEBUG(dbgs() << "Call Slot: Dest pointer not dereferenceable\n");
942 return false;
943 }
944
945 // Make sure that nothing can observe cpyDest being written early. There are
946 // a number of cases to consider:
947 // 1. cpyDest cannot be accessed between C and cpyStore as a precondition of
948 // the transform.
949 // 2. C itself may not access cpyDest (prior to the transform). This is
950 // checked further below.
951 // 3. If cpyDest is accessible to the caller of this function (potentially
952 // captured and not based on an alloca), we need to ensure that we cannot
953 // unwind between C and cpyStore. This is checked here.
954 // 4. If cpyDest is potentially captured, there may be accesses to it from
955 // another thread. In this case, we need to check that cpyStore is
956 // guaranteed to be executed if C is. As it is a non-atomic access, it
957 // renders accesses from other threads undefined.
958 // TODO: This is currently not checked.
959 if (mayBeVisibleThroughUnwinding(cpyDest, C, cpyStore)) {
960 LLVM_DEBUG(dbgs() << "Call Slot: Dest may be visible through unwinding\n");
961 return false;
962 }
963
964 // Check that dest points to memory that is at least as aligned as src.
965 Align srcAlign = srcAlloca->getAlign();
966 bool isDestSufficientlyAligned = srcAlign <= cpyDestAlign;
967 // If dest is not aligned enough and we can't increase its alignment then
968 // bail out.
969 if (!isDestSufficientlyAligned && !isa<AllocaInst>(cpyDest)) {
970 LLVM_DEBUG(dbgs() << "Call Slot: Dest not sufficiently aligned\n");
971 return false;
972 }
973
974 // Check that src is not accessed except via the call and the memcpy. This
975 // guarantees that it holds only undefined values when passed in (so the final
976 // memcpy can be dropped), that it is not read or written between the call and
977 // the memcpy, and that writing beyond the end of it is undefined.
978 SmallVector<User *, 8> srcUseList(srcAlloca->users());
979 while (!srcUseList.empty()) {
980 User *U = srcUseList.pop_back_val();
981
982 if (isa<BitCastInst>(U) || isa<AddrSpaceCastInst>(U)) {
983 append_range(srcUseList, U->users());
984 continue;
985 }
986 if (const auto *G = dyn_cast<GetElementPtrInst>(U)) {
987 if (!G->hasAllZeroIndices())
988 return false;
989
990 append_range(srcUseList, U->users());
991 continue;
992 }
993 if (const auto *IT = dyn_cast<IntrinsicInst>(U))
994 if (IT->isLifetimeStartOrEnd())
995 continue;
996
997 if (U != C && U != cpyLoad)
998 return false;
999 }
1000
1001 // Check whether src is captured by the called function, in which case there
1002 // may be further indirect uses of src.
1003 bool SrcIsCaptured = any_of(C->args(), [&](Use &U) {
1004 return U->stripPointerCasts() == cpySrc &&
1005 !C->doesNotCapture(C->getArgOperandNo(&U));
1006 });
1007
1008 // If src is captured, then check whether there are any potential uses of
1009 // src through the captured pointer before the lifetime of src ends, either
1010 // due to a lifetime.end or a return from the function.
1011 if (SrcIsCaptured) {
1012 // Check that dest is not captured before/at the call. We have already
1013 // checked that src is not captured before it. If either had been captured,
1014 // then the call might be comparing the argument against the captured dest
1015 // or src pointer.
1016 Value *DestObj = getUnderlyingObject(cpyDest);
1017 if (!isIdentifiedFunctionLocal(DestObj) ||
1018 PointerMayBeCapturedBefore(DestObj, /* ReturnCaptures */ true,
1019 /* StoreCaptures */ true, C, DT,
1020 /* IncludeI */ true))
1021 return false;
1022
1023 MemoryLocation SrcLoc =
1024 MemoryLocation(srcAlloca, LocationSize::precise(srcSize));
1025 for (Instruction &I :
1026 make_range(++C->getIterator(), C->getParent()->end())) {
1027 // Lifetime of srcAlloca ends at lifetime.end.
1028 if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
1029 if (II->getIntrinsicID() == Intrinsic::lifetime_end &&
1030 II->getArgOperand(1)->stripPointerCasts() == srcAlloca &&
1031 cast<ConstantInt>(II->getArgOperand(0))->uge(srcSize))
1032 break;
1033 }
1034
1035 // Lifetime of srcAlloca ends at return.
1036 if (isa<ReturnInst>(&I))
1037 break;
1038
1039 // Ignore the direct read of src in the load.
1040 if (&I == cpyLoad)
1041 continue;
1042
1043 // Check whether this instruction may mod/ref src through the captured
1044 // pointer (we have already any direct mod/refs in the loop above).
1045 // Also bail if we hit a terminator, as we don't want to scan into other
1046 // blocks.
1047 if (isModOrRefSet(BAA.getModRefInfo(&I, SrcLoc)) || I.isTerminator())
1048 return false;
1049 }
1050 }
1051
1052 // Since we're changing the parameter to the callsite, we need to make sure
1053 // that what would be the new parameter dominates the callsite.
1054 bool NeedMoveGEP = false;
1055 if (!DT->dominates(cpyDest, C)) {
1056 // Support moving a constant index GEP before the call.
1057 auto *GEP = dyn_cast<GetElementPtrInst>(cpyDest);
1058 if (GEP && GEP->hasAllConstantIndices() &&
1059 DT->dominates(GEP->getPointerOperand(), C))
1060 NeedMoveGEP = true;
1061 else
1062 return false;
1063 }
1064
1065 // In addition to knowing that the call does not access src in some
1066 // unexpected manner, for example via a global, which we deduce from
1067 // the use analysis, we also need to know that it does not sneakily
1068 // access dest. We rely on AA to figure this out for us.
1069 MemoryLocation DestWithSrcSize(cpyDest, LocationSize::precise(srcSize));
1070 ModRefInfo MR = BAA.getModRefInfo(C, DestWithSrcSize);
1071 // If necessary, perform additional analysis.
1072 if (isModOrRefSet(MR))
1073 MR = BAA.callCapturesBefore(C, DestWithSrcSize, DT);
1074 if (isModOrRefSet(MR))
1075 return false;
1076
1077 // We can't create address space casts here because we don't know if they're
1078 // safe for the target.
1079 if (cpySrc->getType() != cpyDest->getType())
1080 return false;
1081 for (unsigned ArgI = 0; ArgI < C->arg_size(); ++ArgI)
1082 if (C->getArgOperand(ArgI)->stripPointerCasts() == cpySrc &&
1083 cpySrc->getType() != C->getArgOperand(ArgI)->getType())
1084 return false;
1085
1086 // All the checks have passed, so do the transformation.
1087 bool changedArgument = false;
1088 for (unsigned ArgI = 0; ArgI < C->arg_size(); ++ArgI)
1089 if (C->getArgOperand(ArgI)->stripPointerCasts() == cpySrc) {
1090 changedArgument = true;
1091 C->setArgOperand(ArgI, cpyDest);
1092 }
1093
1094 if (!changedArgument)
1095 return false;
1096
1097 // If the destination wasn't sufficiently aligned then increase its alignment.
1098 if (!isDestSufficientlyAligned) {
1099 assert(isa<AllocaInst>(cpyDest) && "Can only increase alloca alignment!");
1100 cast<AllocaInst>(cpyDest)->setAlignment(srcAlign);
1101 }
1102
1103 if (NeedMoveGEP) {
1104 auto *GEP = dyn_cast<GetElementPtrInst>(cpyDest);
1105 GEP->moveBefore(C);
1106 }
1107
1108 if (SkippedLifetimeStart) {
1109 SkippedLifetimeStart->moveBefore(C);
1110 MSSAU->moveBefore(MSSA->getMemoryAccess(SkippedLifetimeStart),
1111 MSSA->getMemoryAccess(C));
1112 }
1113
1114 combineAAMetadata(C, cpyLoad);
1115 if (cpyLoad != cpyStore)
1116 combineAAMetadata(C, cpyStore);
1117
1118 ++NumCallSlot;
1119 return true;
1120}
1121
1122/// We've found that the (upward scanning) memory dependence of memcpy 'M' is
1123/// the memcpy 'MDep'. Try to simplify M to copy from MDep's input if we can.
1124bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
1125 MemCpyInst *MDep,
1126 BatchAAResults &BAA) {
1127 // We can only transforms memcpy's where the dest of one is the source of the
1128 // other.
1129 if (M->getSource() != MDep->getDest() || MDep->isVolatile())
1130 return false;
1131
1132 // If dep instruction is reading from our current input, then it is a noop
1133 // transfer and substituting the input won't change this instruction. Just
1134 // ignore the input and let someone else zap MDep. This handles cases like:
1135 // memcpy(a <- a)
1136 // memcpy(b <- a)
1137 if (M->getSource() == MDep->getSource())
1138 return false;
1139
1140 // Second, the length of the memcpy's must be the same, or the preceding one
1141 // must be larger than the following one.
1142 if (MDep->getLength() != M->getLength()) {
1143 auto *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
1144 auto *MLen = dyn_cast<ConstantInt>(M->getLength());
1145 if (!MDepLen || !MLen || MDepLen->getZExtValue() < MLen->getZExtValue())
1146 return false;
1147 }
1148
1149 // Verify that the copied-from memory doesn't change in between the two
1150 // transfers. For example, in:
1151 // memcpy(a <- b)
1152 // *b = 42;
1153 // memcpy(c <- a)
1154 // It would be invalid to transform the second memcpy into memcpy(c <- b).
1155 //
1156 // TODO: If the code between M and MDep is transparent to the destination "c",
1157 // then we could still perform the xform by moving M up to the first memcpy.
1158 // TODO: It would be sufficient to check the MDep source up to the memcpy
1159 // size of M, rather than MDep.
1160 if (writtenBetween(MSSA, BAA, MemoryLocation::getForSource(MDep),
1161 MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(M)))
1162 return false;
1163
1164 // If the dest of the second might alias the source of the first, then the
1165 // source and dest might overlap. In addition, if the source of the first
1166 // points to constant memory, they won't overlap by definition. Otherwise, we
1167 // still want to eliminate the intermediate value, but we have to generate a
1168 // memmove instead of memcpy.
1169 bool UseMemMove = false;
1171 // Don't convert llvm.memcpy.inline into memmove because memmove can be
1172 // lowered as a call, and that is not allowed for llvm.memcpy.inline (and
1173 // there is no inline version of llvm.memmove)
1174 if (isa<MemCpyInlineInst>(M))
1175 return false;
1176 UseMemMove = true;
1177 }
1178
1179 // If all checks passed, then we can transform M.
1180 LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy->memcpy src:\n"
1181 << *MDep << '\n'
1182 << *M << '\n');
1183
1184 // TODO: Is this worth it if we're creating a less aligned memcpy? For
1185 // example we could be moving from movaps -> movq on x86.
1186 IRBuilder<> Builder(M);
1187 Instruction *NewM;
1188 if (UseMemMove)
1189 NewM = Builder.CreateMemMove(M->getRawDest(), M->getDestAlign(),
1190 MDep->getRawSource(), MDep->getSourceAlign(),
1191 M->getLength(), M->isVolatile());
1192 else if (isa<MemCpyInlineInst>(M)) {
1193 // llvm.memcpy may be promoted to llvm.memcpy.inline, but the converse is
1194 // never allowed since that would allow the latter to be lowered as a call
1195 // to an external function.
1196 NewM = Builder.CreateMemCpyInline(
1197 M->getRawDest(), M->getDestAlign(), MDep->getRawSource(),
1198 MDep->getSourceAlign(), M->getLength(), M->isVolatile());
1199 } else
1200 NewM = Builder.CreateMemCpy(M->getRawDest(), M->getDestAlign(),
1201 MDep->getRawSource(), MDep->getSourceAlign(),
1202 M->getLength(), M->isVolatile());
1203 NewM->copyMetadata(*M, LLVMContext::MD_DIAssignID);
1204
1205 assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M)));
1206 auto *LastDef = cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M));
1207 auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, nullptr, LastDef);
1208 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1209
1210 // Remove the instruction we're replacing.
1211 eraseInstruction(M);
1212 ++NumMemCpyInstr;
1213 return true;
1214}
1215
1216/// We've found that the (upward scanning) memory dependence of \p MemCpy is
1217/// \p MemSet. Try to simplify \p MemSet to only set the trailing bytes that
1218/// weren't copied over by \p MemCpy.
1219///
1220/// In other words, transform:
1221/// \code
1222/// memset(dst, c, dst_size);
1223/// ...
1224/// memcpy(dst, src, src_size);
1225/// \endcode
1226/// into:
1227/// \code
1228/// ...
1229/// memset(dst + src_size, c, dst_size <= src_size ? 0 : dst_size - src_size);
1230/// memcpy(dst, src, src_size);
1231/// \endcode
1232///
1233/// The memset is sunk to just before the memcpy to ensure that src_size is
1234/// present when emitting the simplified memset.
1235bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
1236 MemSetInst *MemSet,
1237 BatchAAResults &BAA) {
1238 // We can only transform memset/memcpy with the same destination.
1239 if (!BAA.isMustAlias(MemSet->getDest(), MemCpy->getDest()))
1240 return false;
1241
1242 // Check that src and dst of the memcpy aren't the same. While memcpy
1243 // operands cannot partially overlap, exact equality is allowed.
1244 if (isModSet(BAA.getModRefInfo(MemCpy, MemoryLocation::getForSource(MemCpy))))
1245 return false;
1246
1247 // We know that dst up to src_size is not written. We now need to make sure
1248 // that dst up to dst_size is not accessed. (If we did not move the memset,
1249 // checking for reads would be sufficient.)
1251 MSSA->getMemoryAccess(MemSet),
1252 MSSA->getMemoryAccess(MemCpy)))
1253 return false;
1254
1255 // Use the same i8* dest as the memcpy, killing the memset dest if different.
1256 Value *Dest = MemCpy->getRawDest();
1257 Value *DestSize = MemSet->getLength();
1258 Value *SrcSize = MemCpy->getLength();
1259
1260 if (mayBeVisibleThroughUnwinding(Dest, MemSet, MemCpy))
1261 return false;
1262
1263 // If the sizes are the same, simply drop the memset instead of generating
1264 // a replacement with zero size.
1265 if (DestSize == SrcSize) {
1266 eraseInstruction(MemSet);
1267 return true;
1268 }
1269
1270 // By default, create an unaligned memset.
1271 Align Alignment = Align(1);
1272 // If Dest is aligned, and SrcSize is constant, use the minimum alignment
1273 // of the sum.
1274 const Align DestAlign = std::max(MemSet->getDestAlign().valueOrOne(),
1275 MemCpy->getDestAlign().valueOrOne());
1276 if (DestAlign > 1)
1277 if (auto *SrcSizeC = dyn_cast<ConstantInt>(SrcSize))
1278 Alignment = commonAlignment(DestAlign, SrcSizeC->getZExtValue());
1279
1280 IRBuilder<> Builder(MemCpy);
1281
1282 // Preserve the debug location of the old memset for the code emitted here
1283 // related to the new memset. This is correct according to the rules in
1284 // https://llvm.org/docs/HowToUpdateDebugInfo.html about "when to preserve an
1285 // instruction location", given that we move the memset within the basic
1286 // block.
1287 assert(MemSet->getParent() == MemCpy->getParent() &&
1288 "Preserving debug location based on moving memset within BB.");
1289 Builder.SetCurrentDebugLocation(MemSet->getDebugLoc());
1290
1291 // If the sizes have different types, zext the smaller one.
1292 if (DestSize->getType() != SrcSize->getType()) {
1293 if (DestSize->getType()->getIntegerBitWidth() >
1294 SrcSize->getType()->getIntegerBitWidth())
1295 SrcSize = Builder.CreateZExt(SrcSize, DestSize->getType());
1296 else
1297 DestSize = Builder.CreateZExt(DestSize, SrcSize->getType());
1298 }
1299
1300 Value *Ule = Builder.CreateICmpULE(DestSize, SrcSize);
1301 Value *SizeDiff = Builder.CreateSub(DestSize, SrcSize);
1302 Value *MemsetLen = Builder.CreateSelect(
1303 Ule, ConstantInt::getNullValue(DestSize->getType()), SizeDiff);
1304 Instruction *NewMemSet =
1305 Builder.CreateMemSet(Builder.CreatePtrAdd(Dest, SrcSize),
1306 MemSet->getOperand(1), MemsetLen, Alignment);
1307
1308 assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)) &&
1309 "MemCpy must be a MemoryDef");
1310 // The new memset is inserted before the memcpy, and it is known that the
1311 // memcpy's defining access is the memset about to be removed.
1312 auto *LastDef =
1313 cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy));
1314 auto *NewAccess =
1315 MSSAU->createMemoryAccessBefore(NewMemSet, nullptr, LastDef);
1316 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1317
1318 eraseInstruction(MemSet);
1319 return true;
1320}
1321
1322/// Determine whether the instruction has undefined content for the given Size,
1323/// either because it was freshly alloca'd or started its lifetime.
1325 MemoryDef *Def, Value *Size) {
1326 if (MSSA->isLiveOnEntryDef(Def))
1327 return isa<AllocaInst>(getUnderlyingObject(V));
1328
1329 if (auto *II = dyn_cast_or_null<IntrinsicInst>(Def->getMemoryInst())) {
1330 if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
1331 auto *LTSize = cast<ConstantInt>(II->getArgOperand(0));
1332
1333 if (auto *CSize = dyn_cast<ConstantInt>(Size)) {
1334 if (AA.isMustAlias(V, II->getArgOperand(1)) &&
1335 LTSize->getZExtValue() >= CSize->getZExtValue())
1336 return true;
1337 }
1338
1339 // If the lifetime.start covers a whole alloca (as it almost always
1340 // does) and we're querying a pointer based on that alloca, then we know
1341 // the memory is definitely undef, regardless of how exactly we alias.
1342 // The size also doesn't matter, as an out-of-bounds access would be UB.
1343 if (auto *Alloca = dyn_cast<AllocaInst>(getUnderlyingObject(V))) {
1344 if (getUnderlyingObject(II->getArgOperand(1)) == Alloca) {
1345 const DataLayout &DL = Alloca->getModule()->getDataLayout();
1346 if (std::optional<TypeSize> AllocaSize =
1347 Alloca->getAllocationSize(DL))
1348 if (*AllocaSize == LTSize->getValue())
1349 return true;
1350 }
1351 }
1352 }
1353 }
1354
1355 return false;
1356}
1357
1358/// Transform memcpy to memset when its source was just memset.
1359/// In other words, turn:
1360/// \code
1361/// memset(dst1, c, dst1_size);
1362/// memcpy(dst2, dst1, dst2_size);
1363/// \endcode
1364/// into:
1365/// \code
1366/// memset(dst1, c, dst1_size);
1367/// memset(dst2, c, dst2_size);
1368/// \endcode
1369/// When dst2_size <= dst1_size.
1370bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
1371 MemSetInst *MemSet,
1372 BatchAAResults &BAA) {
1373 // Make sure that memcpy(..., memset(...), ...), that is we are memsetting and
1374 // memcpying from the same address. Otherwise it is hard to reason about.
1375 if (!BAA.isMustAlias(MemSet->getRawDest(), MemCpy->getRawSource()))
1376 return false;
1377
1378 Value *MemSetSize = MemSet->getLength();
1379 Value *CopySize = MemCpy->getLength();
1380
1381 if (MemSetSize != CopySize) {
1382 // Make sure the memcpy doesn't read any more than what the memset wrote.
1383 // Don't worry about sizes larger than i64.
1384
1385 // A known memset size is required.
1386 auto *CMemSetSize = dyn_cast<ConstantInt>(MemSetSize);
1387 if (!CMemSetSize)
1388 return false;
1389
1390 // A known memcpy size is also required.
1391 auto *CCopySize = dyn_cast<ConstantInt>(CopySize);
1392 if (!CCopySize)
1393 return false;
1394 if (CCopySize->getZExtValue() > CMemSetSize->getZExtValue()) {
1395 // If the memcpy is larger than the memset, but the memory was undef prior
1396 // to the memset, we can just ignore the tail. Technically we're only
1397 // interested in the bytes from MemSetSize..CopySize here, but as we can't
1398 // easily represent this location, we use the full 0..CopySize range.
1399 MemoryLocation MemCpyLoc = MemoryLocation::getForSource(MemCpy);
1400 bool CanReduceSize = false;
1401 MemoryUseOrDef *MemSetAccess = MSSA->getMemoryAccess(MemSet);
1403 MemSetAccess->getDefiningAccess(), MemCpyLoc, BAA);
1404 if (auto *MD = dyn_cast<MemoryDef>(Clobber))
1405 if (hasUndefContents(MSSA, BAA, MemCpy->getSource(), MD, CopySize))
1406 CanReduceSize = true;
1407
1408 if (!CanReduceSize)
1409 return false;
1410 CopySize = MemSetSize;
1411 }
1412 }
1413
1414 IRBuilder<> Builder(MemCpy);
1415 Instruction *NewM =
1416 Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1),
1417 CopySize, MemCpy->getDestAlign());
1418 auto *LastDef =
1419 cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy));
1420 auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, nullptr, LastDef);
1421 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1422
1423 return true;
1424}
1425
1426// Attempts to optimize the pattern whereby memory is copied from an alloca to
1427// another alloca, where the two allocas don't have conflicting mod/ref. If
1428// successful, the two allocas can be merged into one and the transfer can be
1429// deleted. This pattern is generated frequently in Rust, due to the ubiquity of
1430// move operations in that language.
1431//
1432// Once we determine that the optimization is safe to perform, we replace all
1433// uses of the destination alloca with the source alloca. We also "shrink wrap"
1434// the lifetime markers of the single merged alloca to before the first use
1435// and after the last use. Note that the "shrink wrapping" procedure is a safe
1436// transformation only because we restrict the scope of this optimization to
1437// allocas that aren't captured.
1438bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
1439 AllocaInst *DestAlloca,
1440 AllocaInst *SrcAlloca, TypeSize Size,
1441 BatchAAResults &BAA) {
1442 LLVM_DEBUG(dbgs() << "Stack Move: Attempting to optimize:\n"
1443 << *Store << "\n");
1444
1445 // Make sure the two allocas are in the same address space.
1446 if (SrcAlloca->getAddressSpace() != DestAlloca->getAddressSpace()) {
1447 LLVM_DEBUG(dbgs() << "Stack Move: Address space mismatch\n");
1448 return false;
1449 }
1450
1451 // Check that copy is full with static size.
1452 const DataLayout &DL = DestAlloca->getModule()->getDataLayout();
1453 std::optional<TypeSize> SrcSize = SrcAlloca->getAllocationSize(DL);
1454 if (!SrcSize || Size != *SrcSize) {
1455 LLVM_DEBUG(dbgs() << "Stack Move: Source alloca size mismatch\n");
1456 return false;
1457 }
1458 std::optional<TypeSize> DestSize = DestAlloca->getAllocationSize(DL);
1459 if (!DestSize || Size != *DestSize) {
1460 LLVM_DEBUG(dbgs() << "Stack Move: Destination alloca size mismatch\n");
1461 return false;
1462 }
1463
1464 if (!SrcAlloca->isStaticAlloca() || !DestAlloca->isStaticAlloca())
1465 return false;
1466
1467 // Check that src and dest are never captured, unescaped allocas. Also
1468 // find the nearest common dominator and postdominator for all users in
1469 // order to shrink wrap the lifetimes, and instructions with noalias metadata
1470 // to remove them.
1471
1472 SmallVector<Instruction *, 4> LifetimeMarkers;
1473 SmallSet<Instruction *, 4> NoAliasInstrs;
1474 bool SrcNotDom = false;
1475
1476 // Recursively track the user and check whether modified alias exist.
1477 auto IsDereferenceableOrNull = [](Value *V, const DataLayout &DL) -> bool {
1478 bool CanBeNull, CanBeFreed;
1479 return V->getPointerDereferenceableBytes(DL, CanBeNull, CanBeFreed);
1480 };
1481
1482 auto CaptureTrackingWithModRef =
1483 [&](Instruction *AI,
1484 function_ref<bool(Instruction *)> ModRefCallback) -> bool {
1486 Worklist.push_back(AI);
1487 unsigned MaxUsesToExplore = getDefaultMaxUsesToExploreForCaptureTracking();
1488 Worklist.reserve(MaxUsesToExplore);
1490 while (!Worklist.empty()) {
1491 Instruction *I = Worklist.back();
1492 Worklist.pop_back();
1493 for (const Use &U : I->uses()) {
1494 auto *UI = cast<Instruction>(U.getUser());
1495 // If any use that isn't dominated by SrcAlloca exists, we move src
1496 // alloca to the entry before the transformation.
1497 if (!DT->dominates(SrcAlloca, UI))
1498 SrcNotDom = true;
1499
1500 if (Visited.size() >= MaxUsesToExplore) {
1501 LLVM_DEBUG(
1502 dbgs()
1503 << "Stack Move: Exceeded max uses to see ModRef, bailing\n");
1504 return false;
1505 }
1506 if (!Visited.insert(&U).second)
1507 continue;
1508 switch (DetermineUseCaptureKind(U, IsDereferenceableOrNull)) {
1510 return false;
1512 // Instructions cannot have non-instruction users.
1513 Worklist.push_back(UI);
1514 continue;
1516 if (UI->isLifetimeStartOrEnd()) {
1517 // We note the locations of these intrinsic calls so that we can
1518 // delete them later if the optimization succeeds, this is safe
1519 // since both llvm.lifetime.start and llvm.lifetime.end intrinsics
1520 // practically fill all the bytes of the alloca with an undefined
1521 // value, although conceptually marked as alive/dead.
1522 int64_t Size = cast<ConstantInt>(UI->getOperand(0))->getSExtValue();
1523 if (Size < 0 || Size == DestSize) {
1524 LifetimeMarkers.push_back(UI);
1525 continue;
1526 }
1527 }
1528 if (UI->hasMetadata(LLVMContext::MD_noalias))
1529 NoAliasInstrs.insert(UI);
1530 if (!ModRefCallback(UI))
1531 return false;
1532 }
1533 }
1534 }
1535 }
1536 return true;
1537 };
1538
1539 // Check that dest has no Mod/Ref, from the alloca to the Store, except full
1540 // size lifetime intrinsics. And collect modref inst for the reachability
1541 // check.
1542 ModRefInfo DestModRef = ModRefInfo::NoModRef;
1543 MemoryLocation DestLoc(DestAlloca, LocationSize::precise(Size));
1544 SmallVector<BasicBlock *, 8> ReachabilityWorklist;
1545 auto DestModRefCallback = [&](Instruction *UI) -> bool {
1546 // We don't care about the store itself.
1547 if (UI == Store)
1548 return true;
1549 ModRefInfo Res = BAA.getModRefInfo(UI, DestLoc);
1550 DestModRef |= Res;
1551 if (isModOrRefSet(Res)) {
1552 // Instructions reachability checks.
1553 // FIXME: adding the Instruction version isPotentiallyReachableFromMany on
1554 // lib/Analysis/CFG.cpp (currently only for BasicBlocks) might be helpful.
1555 if (UI->getParent() == Store->getParent()) {
1556 // The same block case is special because it's the only time we're
1557 // looking within a single block to see which instruction comes first.
1558 // Once we start looking at multiple blocks, the first instruction of
1559 // the block is reachable, so we only need to determine reachability
1560 // between whole blocks.
1561 BasicBlock *BB = UI->getParent();
1562
1563 // If A comes before B, then B is definitively reachable from A.
1564 if (UI->comesBefore(Store))
1565 return false;
1566
1567 // If the user's parent block is entry, no predecessor exists.
1568 if (BB->isEntryBlock())
1569 return true;
1570
1571 // Otherwise, continue doing the normal per-BB CFG walk.
1572 ReachabilityWorklist.append(succ_begin(BB), succ_end(BB));
1573 } else {
1574 ReachabilityWorklist.push_back(UI->getParent());
1575 }
1576 }
1577 return true;
1578 };
1579
1580 if (!CaptureTrackingWithModRef(DestAlloca, DestModRefCallback))
1581 return false;
1582 // Bailout if Dest may have any ModRef before Store.
1583 if (!ReachabilityWorklist.empty() &&
1584 isPotentiallyReachableFromMany(ReachabilityWorklist, Store->getParent(),
1585 nullptr, DT, nullptr))
1586 return false;
1587
1588 // Check that, from after the Load to the end of the BB,
1589 // - if the dest has any Mod, src has no Ref, and
1590 // - if the dest has any Ref, src has no Mod except full-sized lifetimes.
1591 MemoryLocation SrcLoc(SrcAlloca, LocationSize::precise(Size));
1592
1593 auto SrcModRefCallback = [&](Instruction *UI) -> bool {
1594 // Any ModRef post-dominated by Load doesn't matter, also Load and Store
1595 // themselves can be ignored.
1596 if (PDT->dominates(Load, UI) || UI == Load || UI == Store)
1597 return true;
1598 ModRefInfo Res = BAA.getModRefInfo(UI, SrcLoc);
1599 if ((isModSet(DestModRef) && isRefSet(Res)) ||
1600 (isRefSet(DestModRef) && isModSet(Res)))
1601 return false;
1602
1603 return true;
1604 };
1605
1606 if (!CaptureTrackingWithModRef(SrcAlloca, SrcModRefCallback))
1607 return false;
1608
1609 // We can do the transformation. First, move the SrcAlloca to the start of the
1610 // BB.
1611 if (SrcNotDom)
1612 SrcAlloca->moveBefore(*SrcAlloca->getParent(),
1613 SrcAlloca->getParent()->getFirstInsertionPt());
1614 // Align the allocas appropriately.
1615 SrcAlloca->setAlignment(
1616 std::max(SrcAlloca->getAlign(), DestAlloca->getAlign()));
1617
1618 // Merge the two allocas.
1619 DestAlloca->replaceAllUsesWith(SrcAlloca);
1620 eraseInstruction(DestAlloca);
1621
1622 // Drop metadata on the source alloca.
1623 SrcAlloca->dropUnknownNonDebugMetadata();
1624
1625 // TODO: Reconstruct merged lifetime markers.
1626 // Remove all other lifetime markers. if the original lifetime intrinsics
1627 // exists.
1628 if (!LifetimeMarkers.empty()) {
1629 for (Instruction *I : LifetimeMarkers)
1630 eraseInstruction(I);
1631 }
1632
1633 // As this transformation can cause memory accesses that didn't previously
1634 // alias to begin to alias one another, we remove !noalias metadata from any
1635 // uses of either alloca. This is conservative, but more precision doesn't
1636 // seem worthwhile right now.
1637 for (Instruction *I : NoAliasInstrs)
1638 I->setMetadata(LLVMContext::MD_noalias, nullptr);
1639
1640 LLVM_DEBUG(dbgs() << "Stack Move: Performed staack-move optimization\n");
1641 NumStackMove++;
1642 return true;
1643}
1644
1645static bool isZeroSize(Value *Size) {
1646 if (auto *I = dyn_cast<Instruction>(Size))
1647 if (auto *Res = simplifyInstruction(I, I->getModule()->getDataLayout()))
1648 Size = Res;
1649 // Treat undef/poison size like zero.
1650 if (auto *C = dyn_cast<Constant>(Size))
1651 return isa<UndefValue>(C) || C->isNullValue();
1652 return false;
1653}
1654
1655/// Perform simplification of memcpy's. If we have memcpy A
1656/// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite
1657/// B to be a memcpy from X to Z (or potentially a memmove, depending on
1658/// circumstances). This allows later passes to remove the first memcpy
1659/// altogether.
1660bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
1661 // We can only optimize non-volatile memcpy's.
1662 if (M->isVolatile())
1663 return false;
1664
1665 // If the source and destination of the memcpy are the same, then zap it.
1666 if (M->getSource() == M->getDest()) {
1667 ++BBI;
1668 eraseInstruction(M);
1669 return true;
1670 }
1671
1672 // If the size is zero, remove the memcpy. This also prevents infinite loops
1673 // in processMemSetMemCpyDependence, which is a no-op for zero-length memcpys.
1674 if (isZeroSize(M->getLength())) {
1675 ++BBI;
1676 eraseInstruction(M);
1677 return true;
1678 }
1679
1680 MemoryUseOrDef *MA = MSSA->getMemoryAccess(M);
1681 if (!MA)
1682 // Degenerate case: memcpy marked as not accessing memory.
1683 return false;
1684
1685 // If copying from a constant, try to turn the memcpy into a memset.
1686 if (auto *GV = dyn_cast<GlobalVariable>(M->getSource()))
1687 if (GV->isConstant() && GV->hasDefinitiveInitializer())
1688 if (Value *ByteVal = isBytewiseValue(GV->getInitializer(),
1689 M->getModule()->getDataLayout())) {
1690 IRBuilder<> Builder(M);
1691 Instruction *NewM = Builder.CreateMemSet(
1692 M->getRawDest(), ByteVal, M->getLength(), M->getDestAlign(), false);
1693 auto *LastDef = cast<MemoryDef>(MA);
1694 auto *NewAccess =
1695 MSSAU->createMemoryAccessAfter(NewM, nullptr, LastDef);
1696 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1697
1698 eraseInstruction(M);
1699 ++NumCpyToSet;
1700 return true;
1701 }
1702
1703 BatchAAResults BAA(*AA);
1704 // FIXME: Not using getClobberingMemoryAccess() here due to PR54682.
1705 MemoryAccess *AnyClobber = MA->getDefiningAccess();
1707 const MemoryAccess *DestClobber =
1708 MSSA->getWalker()->getClobberingMemoryAccess(AnyClobber, DestLoc, BAA);
1709
1710 // Try to turn a partially redundant memset + memcpy into
1711 // smaller memset + memcpy. We don't need the memcpy size for this.
1712 // The memcpy must post-dom the memset, so limit this to the same basic
1713 // block. A non-local generalization is likely not worthwhile.
1714 if (auto *MD = dyn_cast<MemoryDef>(DestClobber))
1715 if (auto *MDep = dyn_cast_or_null<MemSetInst>(MD->getMemoryInst()))
1716 if (DestClobber->getBlock() == M->getParent())
1717 if (processMemSetMemCpyDependence(M, MDep, BAA))
1718 return true;
1719
1720 MemoryAccess *SrcClobber = MSSA->getWalker()->getClobberingMemoryAccess(
1721 AnyClobber, MemoryLocation::getForSource(M), BAA);
1722
1723 // There are five possible optimizations we can do for memcpy:
1724 // a) memcpy-memcpy xform which exposes redundance for DSE.
1725 // b) call-memcpy xform for return slot optimization.
1726 // c) memcpy from freshly alloca'd space or space that has just started
1727 // its lifetime copies undefined data, and we can therefore eliminate
1728 // the memcpy in favor of the data that was already at the destination.
1729 // d) memcpy from a just-memset'd source can be turned into memset.
1730 // e) elimination of memcpy via stack-move optimization.
1731 if (auto *MD = dyn_cast<MemoryDef>(SrcClobber)) {
1732 if (Instruction *MI = MD->getMemoryInst()) {
1733 if (auto *CopySize = dyn_cast<ConstantInt>(M->getLength())) {
1734 if (auto *C = dyn_cast<CallInst>(MI)) {
1735 if (performCallSlotOptzn(M, M, M->getDest(), M->getSource(),
1736 TypeSize::getFixed(CopySize->getZExtValue()),
1737 M->getDestAlign().valueOrOne(), BAA,
1738 [C]() -> CallInst * { return C; })) {
1739 LLVM_DEBUG(dbgs() << "Performed call slot optimization:\n"
1740 << " call: " << *C << "\n"
1741 << " memcpy: " << *M << "\n");
1742 eraseInstruction(M);
1743 ++NumMemCpyInstr;
1744 return true;
1745 }
1746 }
1747 }
1748 if (auto *MDep = dyn_cast<MemCpyInst>(MI))
1749 if (processMemCpyMemCpyDependence(M, MDep, BAA))
1750 return true;
1751 if (auto *MDep = dyn_cast<MemSetInst>(MI)) {
1752 if (performMemCpyToMemSetOptzn(M, MDep, BAA)) {
1753 LLVM_DEBUG(dbgs() << "Converted memcpy to memset\n");
1754 eraseInstruction(M);
1755 ++NumCpyToSet;
1756 return true;
1757 }
1758 }
1759 }
1760
1761 if (hasUndefContents(MSSA, BAA, M->getSource(), MD, M->getLength())) {
1762 LLVM_DEBUG(dbgs() << "Removed memcpy from undef\n");
1763 eraseInstruction(M);
1764 ++NumMemCpyInstr;
1765 return true;
1766 }
1767 }
1768
1769 // If the transfer is from a stack slot to a stack slot, then we may be able
1770 // to perform the stack-move optimization. See the comments in
1771 // performStackMoveOptzn() for more details.
1772 auto *DestAlloca = dyn_cast<AllocaInst>(M->getDest());
1773 if (!DestAlloca)
1774 return false;
1775 auto *SrcAlloca = dyn_cast<AllocaInst>(M->getSource());
1776 if (!SrcAlloca)
1777 return false;
1778 ConstantInt *Len = dyn_cast<ConstantInt>(M->getLength());
1779 if (Len == nullptr)
1780 return false;
1781 if (performStackMoveOptzn(M, M, DestAlloca, SrcAlloca,
1782 TypeSize::getFixed(Len->getZExtValue()), BAA)) {
1783 // Avoid invalidating the iterator.
1784 BBI = M->getNextNonDebugInstruction()->getIterator();
1785 eraseInstruction(M);
1786 ++NumMemCpyInstr;
1787 return true;
1788 }
1789
1790 return false;
1791}
1792
1793/// Transforms memmove calls to memcpy calls when the src/dst are guaranteed
1794/// not to alias.
1795bool MemCpyOptPass::processMemMove(MemMoveInst *M) {
1796 // See if the source could be modified by this memmove potentially.
1798 return false;
1799
1800 LLVM_DEBUG(dbgs() << "MemCpyOptPass: Optimizing memmove -> memcpy: " << *M
1801 << "\n");
1802
1803 // If not, then we know we can transform this.
1804 Type *ArgTys[3] = {M->getRawDest()->getType(), M->getRawSource()->getType(),
1805 M->getLength()->getType()};
1806 M->setCalledFunction(
1807 Intrinsic::getDeclaration(M->getModule(), Intrinsic::memcpy, ArgTys));
1808
1809 // For MemorySSA nothing really changes (except that memcpy may imply stricter
1810 // aliasing guarantees).
1811
1812 ++NumMoveToCpy;
1813 return true;
1814}
1815
1816/// This is called on every byval argument in call sites.
1817bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) {
1818 const DataLayout &DL = CB.getCaller()->getParent()->getDataLayout();
1819 // Find out what feeds this byval argument.
1820 Value *ByValArg = CB.getArgOperand(ArgNo);
1821 Type *ByValTy = CB.getParamByValType(ArgNo);
1822 TypeSize ByValSize = DL.getTypeAllocSize(ByValTy);
1823 MemoryLocation Loc(ByValArg, LocationSize::precise(ByValSize));
1824 MemoryUseOrDef *CallAccess = MSSA->getMemoryAccess(&CB);
1825 if (!CallAccess)
1826 return false;
1827 MemCpyInst *MDep = nullptr;
1828 BatchAAResults BAA(*AA);
1830 CallAccess->getDefiningAccess(), Loc, BAA);
1831 if (auto *MD = dyn_cast<MemoryDef>(Clobber))
1832 MDep = dyn_cast_or_null<MemCpyInst>(MD->getMemoryInst());
1833
1834 // If the byval argument isn't fed by a memcpy, ignore it. If it is fed by
1835 // a memcpy, see if we can byval from the source of the memcpy instead of the
1836 // result.
1837 if (!MDep || MDep->isVolatile() ||
1838 ByValArg->stripPointerCasts() != MDep->getDest())
1839 return false;
1840
1841 // The length of the memcpy must be larger or equal to the size of the byval.
1842 auto *C1 = dyn_cast<ConstantInt>(MDep->getLength());
1843 if (!C1 || !TypeSize::isKnownGE(
1844 TypeSize::getFixed(C1->getValue().getZExtValue()), ByValSize))
1845 return false;
1846
1847 // Get the alignment of the byval. If the call doesn't specify the alignment,
1848 // then it is some target specific value that we can't know.
1849 MaybeAlign ByValAlign = CB.getParamAlign(ArgNo);
1850 if (!ByValAlign)
1851 return false;
1852
1853 // If it is greater than the memcpy, then we check to see if we can force the
1854 // source of the memcpy to the alignment we need. If we fail, we bail out.
1855 MaybeAlign MemDepAlign = MDep->getSourceAlign();
1856 if ((!MemDepAlign || *MemDepAlign < *ByValAlign) &&
1857 getOrEnforceKnownAlignment(MDep->getSource(), ByValAlign, DL, &CB, AC,
1858 DT) < *ByValAlign)
1859 return false;
1860
1861 // The type of the memcpy source must match the byval argument
1862 if (MDep->getSource()->getType() != ByValArg->getType())
1863 return false;
1864
1865 // Verify that the copied-from memory doesn't change in between the memcpy and
1866 // the byval call.
1867 // memcpy(a <- b)
1868 // *b = 42;
1869 // foo(*a)
1870 // It would be invalid to transform the second memcpy into foo(*b).
1871 if (writtenBetween(MSSA, BAA, MemoryLocation::getForSource(MDep),
1872 MSSA->getMemoryAccess(MDep), CallAccess))
1873 return false;
1874
1875 LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy to byval:\n"
1876 << " " << *MDep << "\n"
1877 << " " << CB << "\n");
1878
1879 // Otherwise we're good! Update the byval argument.
1880 combineAAMetadata(&CB, MDep);
1881 CB.setArgOperand(ArgNo, MDep->getSource());
1882 ++NumMemCpyInstr;
1883 return true;
1884}
1885
1886/// This is called on memcpy dest pointer arguments attributed as immutable
1887/// during call. Try to use memcpy source directly if all of the following
1888/// conditions are satisfied.
1889/// 1. The memcpy dst is neither modified during the call nor captured by the
1890/// call. (if readonly, noalias, nocapture attributes on call-site.)
1891/// 2. The memcpy dst is an alloca with known alignment & size.
1892/// 2-1. The memcpy length == the alloca size which ensures that the new
1893/// pointer is dereferenceable for the required range
1894/// 2-2. The src pointer has alignment >= the alloca alignment or can be
1895/// enforced so.
1896/// 3. The memcpy dst and src is not modified between the memcpy and the call.
1897/// (if MSSA clobber check is safe.)
1898/// 4. The memcpy src is not modified during the call. (ModRef check shows no
1899/// Mod.)
1900bool MemCpyOptPass::processImmutArgument(CallBase &CB, unsigned ArgNo) {
1901 // 1. Ensure passed argument is immutable during call.
1902 if (!(CB.paramHasAttr(ArgNo, Attribute::NoAlias) &&
1903 CB.paramHasAttr(ArgNo, Attribute::NoCapture)))
1904 return false;
1905 const DataLayout &DL = CB.getCaller()->getParent()->getDataLayout();
1906 Value *ImmutArg = CB.getArgOperand(ArgNo);
1907
1908 // 2. Check that arg is alloca
1909 // TODO: Even if the arg gets back to branches, we can remove memcpy if all
1910 // the alloca alignments can be enforced to source alignment.
1911 auto *AI = dyn_cast<AllocaInst>(ImmutArg->stripPointerCasts());
1912 if (!AI)
1913 return false;
1914
1915 std::optional<TypeSize> AllocaSize = AI->getAllocationSize(DL);
1916 // Can't handle unknown size alloca.
1917 // (e.g. Variable Length Array, Scalable Vector)
1918 if (!AllocaSize || AllocaSize->isScalable())
1919 return false;
1920 MemoryLocation Loc(ImmutArg, LocationSize::precise(*AllocaSize));
1921 MemoryUseOrDef *CallAccess = MSSA->getMemoryAccess(&CB);
1922 if (!CallAccess)
1923 return false;
1924
1925 MemCpyInst *MDep = nullptr;
1926 BatchAAResults BAA(*AA);
1928 CallAccess->getDefiningAccess(), Loc, BAA);
1929 if (auto *MD = dyn_cast<MemoryDef>(Clobber))
1930 MDep = dyn_cast_or_null<MemCpyInst>(MD->getMemoryInst());
1931
1932 // If the immut argument isn't fed by a memcpy, ignore it. If it is fed by
1933 // a memcpy, check that the arg equals the memcpy dest.
1934 if (!MDep || MDep->isVolatile() || AI != MDep->getDest())
1935 return false;
1936
1937 // The type of the memcpy source must match the immut argument
1938 if (MDep->getSource()->getType() != ImmutArg->getType())
1939 return false;
1940
1941 // 2-1. The length of the memcpy must be equal to the size of the alloca.
1942 auto *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
1943 if (!MDepLen || AllocaSize != MDepLen->getValue())
1944 return false;
1945
1946 // 2-2. the memcpy source align must be larger than or equal the alloca's
1947 // align. If not so, we check to see if we can force the source of the memcpy
1948 // to the alignment we need. If we fail, we bail out.
1949 Align MemDepAlign = MDep->getSourceAlign().valueOrOne();
1950 Align AllocaAlign = AI->getAlign();
1951 if (MemDepAlign < AllocaAlign &&
1952 getOrEnforceKnownAlignment(MDep->getSource(), AllocaAlign, DL, &CB, AC,
1953 DT) < AllocaAlign)
1954 return false;
1955
1956 // 3. Verify that the source doesn't change in between the memcpy and
1957 // the call.
1958 // memcpy(a <- b)
1959 // *b = 42;
1960 // foo(*a)
1961 // It would be invalid to transform the second memcpy into foo(*b).
1962 if (writtenBetween(MSSA, BAA, MemoryLocation::getForSource(MDep),
1963 MSSA->getMemoryAccess(MDep), CallAccess))
1964 return false;
1965
1966 // 4. The memcpy src must not be modified during the call.
1968 return false;
1969
1970 LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy to Immut src:\n"
1971 << " " << *MDep << "\n"
1972 << " " << CB << "\n");
1973
1974 // Otherwise we're good! Update the immut argument.
1975 combineAAMetadata(&CB, MDep);
1976 CB.setArgOperand(ArgNo, MDep->getSource());
1977 ++NumMemCpyInstr;
1978 return true;
1979}
1980
1981/// Executes one iteration of MemCpyOptPass.
1982bool MemCpyOptPass::iterateOnFunction(Function &F) {
1983 bool MadeChange = false;
1984
1985 // Walk all instruction in the function.
1986 for (BasicBlock &BB : F) {
1987 // Skip unreachable blocks. For example processStore assumes that an
1988 // instruction in a BB can't be dominated by a later instruction in the
1989 // same BB (which is a scenario that can happen for an unreachable BB that
1990 // has itself as a predecessor).
1991 if (!DT->isReachableFromEntry(&BB))
1992 continue;
1993
1994 for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) {
1995 // Avoid invalidating the iterator.
1996 Instruction *I = &*BI++;
1997
1998 bool RepeatInstruction = false;
1999
2000 if (auto *SI = dyn_cast<StoreInst>(I))
2001 MadeChange |= processStore(SI, BI);
2002 else if (auto *M = dyn_cast<MemSetInst>(I))
2003 RepeatInstruction = processMemSet(M, BI);
2004 else if (auto *M = dyn_cast<MemCpyInst>(I))
2005 RepeatInstruction = processMemCpy(M, BI);
2006 else if (auto *M = dyn_cast<MemMoveInst>(I))
2007 RepeatInstruction = processMemMove(M);
2008 else if (auto *CB = dyn_cast<CallBase>(I)) {
2009 for (unsigned i = 0, e = CB->arg_size(); i != e; ++i) {
2010 if (CB->isByValArgument(i))
2011 MadeChange |= processByValArgument(*CB, i);
2012 else if (CB->onlyReadsMemory(i))
2013 MadeChange |= processImmutArgument(*CB, i);
2014 }
2015 }
2016
2017 // Reprocess the instruction if desired.
2018 if (RepeatInstruction) {
2019 if (BI != BB.begin())
2020 --BI;
2021 MadeChange = true;
2022 }
2023 }
2024 }
2025
2026 return MadeChange;
2027}
2028
2030 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
2031 auto *AA = &AM.getResult<AAManager>(F);
2032 auto *AC = &AM.getResult<AssumptionAnalysis>(F);
2033 auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
2034 auto *PDT = &AM.getResult<PostDominatorTreeAnalysis>(F);
2035 auto *MSSA = &AM.getResult<MemorySSAAnalysis>(F);
2036
2037 bool MadeChange = runImpl(F, &TLI, AA, AC, DT, PDT, &MSSA->getMSSA());
2038 if (!MadeChange)
2039 return PreservedAnalyses::all();
2040
2044 return PA;
2045}
2046
2048 AliasAnalysis *AA_, AssumptionCache *AC_,
2049 DominatorTree *DT_, PostDominatorTree *PDT_,
2050 MemorySSA *MSSA_) {
2051 bool MadeChange = false;
2052 TLI = TLI_;
2053 AA = AA_;
2054 AC = AC_;
2055 DT = DT_;
2056 PDT = PDT_;
2057 MSSA = MSSA_;
2058 MemorySSAUpdater MSSAU_(MSSA_);
2059 MSSAU = &MSSAU_;
2060
2061 while (true) {
2062 if (!iterateOnFunction(F))
2063 break;
2064 MadeChange = true;
2065 }
2066
2067 if (VerifyMemorySSA)
2068 MSSA_->verifyMemorySSA();
2069
2070 return MadeChange;
2071}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
This file contains the declarations for the subclasses of Constant, which represent the different fla...
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseSet and SmallDenseSet classes.
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
This is the interface for a simple mod/ref and alias analysis over globals.
Hexagon Common GEP
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
static bool mayBeVisibleThroughUnwinding(Value *V, Instruction *Start, Instruction *End)
static bool isZeroSize(Value *Size)
static void combineAAMetadata(Instruction *ReplInst, Instruction *I)
static bool accessedBetween(BatchAAResults &AA, MemoryLocation Loc, const MemoryUseOrDef *Start, const MemoryUseOrDef *End, Instruction **SkippedLifetimeStart=nullptr)
static bool hasUndefContents(MemorySSA *MSSA, BatchAAResults &AA, Value *V, MemoryDef *Def, Value *Size)
Determine whether the instruction has undefined content for the given Size, either because it was fre...
static cl::opt< bool > EnableMemCpyOptWithoutLibcalls("enable-memcpyopt-without-libcalls", cl::Hidden, cl::desc("Enable memcpyopt even when libcalls are disabled"))
static bool writtenBetween(MemorySSA *MSSA, BatchAAResults &AA, MemoryLocation Loc, const MemoryUseOrDef *Start, const MemoryUseOrDef *End)
This file provides utility analysis objects describing memory locations.
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
static void addRange(SmallVectorImpl< ConstantInt * > &EndPoints, ConstantInt *Low, ConstantInt *High)
Definition: Metadata.cpp:1268
Module.h This file contains the declarations for the Module class.
#define P(N)
This header defines various interfaces for pass management in LLVM.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
A manager for alias analyses.
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Check whether or not an instruction may read or write the optionally specified memory location.
Class for arbitrary precision integers.
Definition: APInt.h:76
an instruction to allocate memory on the stack
Definition: Instructions.h:59
bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size.
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:132
unsigned getAddressSpace() const
Return the address space for the allocation.
Definition: Instructions.h:112
std::optional< TypeSize > getAllocationSize(const DataLayout &DL) const
Get allocation size in bytes.
void setAlignment(Align Align)
Definition: Instructions.h:136
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:321
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:473
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
iterator end()
Definition: BasicBlock.h:443
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:430
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:409
bool isEntryBlock() const
Return true if this is the entry block of the containing function.
Definition: BasicBlock.cpp:564
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:206
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:165
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
ModRefInfo callCapturesBefore(const Instruction *I, const MemoryLocation &MemLoc, DominatorTree *DT)
Represents analyses that only rely on functions' control flow.
Definition: Analysis.h:70
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1494
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
bool isByValArgument(unsigned ArgNo) const
Determine whether this argument is passed by value.
Definition: InstrTypes.h:2045
MaybeAlign getParamAlign(unsigned ArgNo) const
Extract the alignment for a call or parameter (0=unknown).
Definition: InstrTypes.h:2109
bool onlyReadsMemory(unsigned OpNo) const
Definition: InstrTypes.h:2087
Type * getParamByValType(unsigned ArgNo) const
Extract the byval type for a call or parameter.
Definition: InstrTypes.h:2118
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1687
void setArgOperand(unsigned i, Value *v)
Definition: InstrTypes.h:1692
unsigned arg_size() const
Definition: InstrTypes.h:1685
Function * getCaller()
Helper to get the caller (the parent function).
This class represents a function call, abstracting a target machine's calling convention.
This is the shared class of boolean and integer constants.
Definition: Constants.h:80
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:154
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:370
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
Implements a dense probed hash-table based set.
Definition: DenseSet.h:271
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:321
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2666
void mergeDIAssignID(ArrayRef< const Instruction * > SourceInstructions)
Merge the DIAssignID metadata from this instruction and those attached to instructions in SourceInstr...
Definition: DebugInfo.cpp:936
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:454
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:83
const BasicBlock * getParent() const
Definition: Instruction.h:152
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:451
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
void dropUnknownNonDebugMetadata(ArrayRef< unsigned > KnownIDs)
Drop all unknown metadata except for debug locations.
Definition: Metadata.cpp:1589
An instruction for reading from memory.
Definition: Instructions.h:184
Value * getPointerOperand()
Definition: Instructions.h:280
bool isSimple() const
Definition: Instructions.h:272
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:236
static LocationSize precise(uint64_t Value)
This class wraps the llvm.memcpy intrinsic.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
bool runImpl(Function &F, TargetLibraryInfo *TLI, AAResults *AA, AssumptionCache *AC, DominatorTree *DT, PostDominatorTree *PDT, MemorySSA *MSSA)
Value * getLength() const
Value * getRawDest() const
Value * getDest() const
This is just like getRawDest, but it strips off any cast instructions (including addrspacecast) that ...
MaybeAlign getDestAlign() const
bool isVolatile() const
This class wraps the llvm.memmove intrinsic.
Value * getValue() const
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
Value * getRawSource() const
Return the arguments to the instruction.
MaybeAlign getSourceAlign() const
Value * getSource() const
This is just like getRawSource, but it strips off any cast instructions that feed it,...
BasicBlock * getBlock() const
Definition: MemorySSA.h:164
Represents a read-write access to memory, whether it is a must-alias, or a may-alias.
Definition: MemorySSA.h:372
Representation for a specific memory location.
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
static MemoryLocation getForSource(const MemTransferInst *MTI)
Return a location representing the source of a memory transfer.
static MemoryLocation getForDest(const MemIntrinsic *MI)
Return a location representing the destination of a memory set or transfer.
An analysis that produces MemorySSA for a function.
Definition: MemorySSA.h:923
MemorySSA * getMemorySSA() const
Get handle on MemorySSA.
MemoryUseOrDef * createMemoryAccessBefore(Instruction *I, MemoryAccess *Definition, MemoryUseOrDef *InsertPt)
Create a MemoryAccess in MemorySSA before an existing MemoryAccess.
void insertDef(MemoryDef *Def, bool RenameUses=false)
Insert a definition into the MemorySSA IR.
void moveAfter(MemoryUseOrDef *What, MemoryUseOrDef *Where)
void removeMemoryAccess(MemoryAccess *, bool OptimizePhis=false)
Remove a MemoryAccess from MemorySSA, including updating all definitions and uses.
MemoryUseOrDef * createMemoryAccessAfter(Instruction *I, MemoryAccess *Definition, MemoryAccess *InsertPt)
Create a MemoryAccess in MemorySSA after an existing MemoryAccess.
void moveBefore(MemoryUseOrDef *What, MemoryUseOrDef *Where)
MemoryAccess * getClobberingMemoryAccess(const Instruction *I, BatchAAResults &AA)
Given a memory Mod/Ref/ModRef'ing instruction, calling this will give you the nearest dominating Memo...
Definition: MemorySSA.h:1040
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:700
bool dominates(const MemoryAccess *A, const MemoryAccess *B) const
Given two memory accesses in potentially different blocks, determine whether MemoryAccess A dominates...
Definition: MemorySSA.cpp:2113
void verifyMemorySSA(VerificationLevel=VerificationLevel::Fast) const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
Definition: MemorySSA.cpp:1861
MemorySSAWalker * getWalker()
Definition: MemorySSA.cpp:1549
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
Definition: MemorySSA.h:717
bool isLiveOnEntryDef(const MemoryAccess *MA) const
Return true if MA represents the live on entry value.
Definition: MemorySSA.h:737
Class that has the common methods + fields of memory uses/defs.
Definition: MemorySSA.h:252
MemoryAccess * getDefiningAccess() const
Get the access that produces the memory state used by this Use.
Definition: MemorySSA.h:262
Instruction * getMemoryInst() const
Get the instruction that this MemoryUse represents.
Definition: MemorySSA.h:259
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.h:293
Analysis pass which computes a PostDominatorTree.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
bool dominates(const Instruction *I1, const Instruction *I2) const
Return true if I1 dominates I2.
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:109
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:115
void preserveSet()
Mark an analysis set as preserved.
Definition: Analysis.h:144
void preserve()
Mark an analysis as preserved.
Definition: Analysis.h:129
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
size_type size() const
Definition: SmallSet.h:161
bool empty() const
Definition: SmallVector.h:94
void reserve(size_type N)
Definition: SmallVector.h:676
typename SuperClass::const_iterator const_iterator
Definition: SmallVector.h:591
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
typename SuperClass::iterator iterator
Definition: SmallVector.h:590
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
An instruction for storing to memory.
Definition: Instructions.h:317
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool has(LibFunc F) const
Tests whether a library function is available.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:342
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:348
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
Value * getOperand(unsigned i) const
Definition: User.h:169
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:693
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:199
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
static constexpr bool isKnownGE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition: TypeSize.h:236
An efficient, type-erasing, non-owning reference to a callable.
reverse_self_iterator getReverseIterator()
Definition: ilist_node.h:112
self_iterator getIterator()
Definition: ilist_node.h:109
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1469
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:227
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:236
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:456
bool isPotentiallyReachableFromMany(SmallVectorImpl< BasicBlock * > &Worklist, const BasicBlock *StopBB, const SmallPtrSetImpl< BasicBlock * > *ExclusionSet, const DominatorTree *DT=nullptr, const LoopInfo *LI=nullptr)
Determine whether there is at least one path from a block in 'Worklist' to 'StopBB' without passing t...
Definition: CFG.cpp:133
bool isDereferenceableAndAlignedPointer(const Value *V, Type *Ty, Align Alignment, const DataLayout &DL, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Returns true if V is always a dereferenceable pointer with alignment greater or equal than requested.
Definition: Loads.cpp:201
UseCaptureKind DetermineUseCaptureKind(const Use &U, llvm::function_ref< bool(Value *, const DataLayout &)> IsDereferenceableOrNull)
Determine what kind of capture behaviour U may exhibit.
auto partition_point(R &&Range, Predicate P)
Binary search for the first iterator in a range where a predicate is false.
Definition: STLExtras.h:2008
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool PointerMayBeCapturedBefore(const Value *V, bool ReturnCaptures, bool StoreCaptures, const Instruction *I, const DominatorTree *DT, bool IncludeI=false, unsigned MaxUsesToExplore=0, const LoopInfo *LI=nullptr)
PointerMayBeCapturedBefore - Return true if this pointer value may be captured by the enclosing funct...
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2073
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
unsigned getDefaultMaxUsesToExploreForCaptureTracking()
getDefaultMaxUsesToExploreForCaptureTracking - Return default value of the maximal number of uses to ...
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
Align getOrEnforceKnownAlignment(Value *V, MaybeAlign PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
Definition: Local.cpp:1543
bool isModSet(const ModRefInfo MRI)
Definition: ModRef.h:48
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool isModOrRefSet(const ModRefInfo MRI)
Definition: ModRef.h:42
bool isNotVisibleOnUnwind(const Value *Object, bool &RequiresNoCaptureBeforeUnwind)
Return true if Object memory is not visible after an unwind, in the sense that program semantics cann...
void combineMetadata(Instruction *K, const Instruction *J, ArrayRef< unsigned > KnownIDs, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition: Local.cpp:3235
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
Definition: ModRef.h:27
@ NoModRef
The access neither references nor modifies the value stored in memory.
bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition: MemorySSA.cpp:83
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
bool isIdentifiedFunctionLocal(const Value *V)
Return true if V is umabigously identified at the function-level.
bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition: Casting.h:565
Value * isBytewiseValue(Value *V, const DataLayout &DL)
If the specified value can be set by repeating the same byte in memory, return the i8 value that it i...
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
bool isRefSet(const ModRefInfo MRI)
Definition: ModRef.h:51
bool isWritableObject(const Value *Object, bool &ExplicitlyDereferenceableOnly)
Return true if the Object is writable, in the sense that any location based on this pointer that can ...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Definition: Alignment.h:141