LLVM 23.0.0git
MemCpyOptimizer.cpp
Go to the documentation of this file.
1//===- MemCpyOptimizer.cpp - Optimize use of memcpy and friends -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass performs various transformations related to eliminating memcpy
10// calls, or transforming sets of stores into memset's.
11//
12//===----------------------------------------------------------------------===//
13
15#include "llvm/ADT/DenseSet.h"
16#include "llvm/ADT/STLExtras.h"
17#include "llvm/ADT/ScopeExit.h"
19#include "llvm/ADT/Statistic.h"
23#include "llvm/Analysis/CFG.h"
27#include "llvm/Analysis/Loads.h"
34#include "llvm/IR/BasicBlock.h"
35#include "llvm/IR/Constants.h"
36#include "llvm/IR/DataLayout.h"
38#include "llvm/IR/Dominators.h"
39#include "llvm/IR/Function.h"
41#include "llvm/IR/IRBuilder.h"
42#include "llvm/IR/InstrTypes.h"
43#include "llvm/IR/Instruction.h"
46#include "llvm/IR/Intrinsics.h"
47#include "llvm/IR/LLVMContext.h"
48#include "llvm/IR/Module.h"
49#include "llvm/IR/PassManager.h"
51#include "llvm/IR/Type.h"
52#include "llvm/IR/User.h"
53#include "llvm/IR/Value.h"
55#include "llvm/Support/Debug.h"
58#include <algorithm>
59#include <cassert>
60#include <cstdint>
61#include <optional>
62
63using namespace llvm;
64
65#define DEBUG_TYPE "memcpyopt"
66
68 "enable-memcpyopt-without-libcalls", cl::Hidden,
69 cl::desc("Enable memcpyopt even when libcalls are disabled"));
70
71STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted");
72STATISTIC(NumMemMoveInstr, "Number of memmove instructions deleted");
73STATISTIC(NumMemSetInfer, "Number of memsets inferred");
74STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy");
75STATISTIC(NumCpyToSet, "Number of memcpys converted to memset");
76STATISTIC(NumCallSlot, "Number of call slot optimizations performed");
77STATISTIC(NumStackMove, "Number of stack-move optimizations performed");
78
79namespace {
80
81/// Represents a range of memset'd bytes with the ByteVal value.
82/// This allows us to analyze stores like:
83/// store 0 -> P+1
84/// store 0 -> P+0
85/// store 0 -> P+3
86/// store 0 -> P+2
87/// which sometimes happens with stores to arrays of structs etc. When we see
88/// the first store, we make a range [1, 2). The second store extends the range
89/// to [0, 2). The third makes a new range [2, 3). The fourth store joins the
90/// two ranges into [0, 3) which is memset'able.
91struct MemsetRange {
92 // Start/End - A semi range that describes the span that this range covers.
93 // The range is closed at the start and open at the end: [Start, End).
94 int64_t Start, End;
95
96 /// StartPtr - The getelementptr instruction that points to the start of the
97 /// range.
98 Value *StartPtr;
99
100 /// Alignment - The known alignment of the first store.
101 MaybeAlign Alignment;
102
103 /// TheStores - The actual stores that make up this range.
105
106 bool isProfitableToUseMemset(const DataLayout &DL) const;
107};
108
109} // end anonymous namespace
110
111static bool overreadUndefContents(MemorySSA *MSSA, MemCpyInst *MemCpy,
112 MemIntrinsic *MemSrc, BatchAAResults &BAA);
113
114bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const {
115 // If we found more than 4 stores to merge or 16 bytes, use memset.
116 if (TheStores.size() >= 4 || End - Start >= 16)
117 return true;
118
119 // If there is nothing to merge, don't do anything.
120 if (TheStores.size() < 2)
121 return false;
122
123 // If any of the stores are a memset, then it is always good to extend the
124 // memset.
125 for (Instruction *SI : TheStores)
126 if (!isa<StoreInst>(SI))
127 return true;
128
129 // Assume that the code generator is capable of merging pairs of stores
130 // together if it wants to.
131 if (TheStores.size() == 2)
132 return false;
133
134 // If we have fewer than 8 stores, it can still be worthwhile to do this.
135 // For example, merging 4 i8 stores into an i32 store is useful almost always.
136 // However, merging 2 32-bit stores isn't useful on a 32-bit architecture (the
137 // memset will be split into 2 32-bit stores anyway) and doing so can
138 // pessimize the llvm optimizer.
139 //
140 // Since we don't have perfect knowledge here, make some assumptions: assume
141 // the maximum GPR width is the same size as the largest legal integer
142 // size. If so, check to see whether we will end up actually reducing the
143 // number of stores used.
144 unsigned Bytes = unsigned(End - Start);
145 unsigned MaxIntSize = DL.getLargestLegalIntTypeSizeInBits() / 8;
146 if (MaxIntSize == 0)
147 MaxIntSize = 1;
148 unsigned NumPointerStores = Bytes / MaxIntSize;
149
150 // Assume the remaining bytes if any are done a byte at a time.
151 unsigned NumByteStores = Bytes % MaxIntSize;
152
153 // If we will reduce the # stores (according to this heuristic), do the
154 // transformation. This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32
155 // etc.
156 return TheStores.size() > NumPointerStores + NumByteStores;
157}
158
159namespace {
160
161class MemsetRanges {
162 using range_iterator = SmallVectorImpl<MemsetRange>::iterator;
163
164 /// A sorted list of the memset ranges.
166
167 const DataLayout &DL;
168
169public:
170 MemsetRanges(const DataLayout &DL) : DL(DL) {}
171
173
174 const_iterator begin() const { return Ranges.begin(); }
175 const_iterator end() const { return Ranges.end(); }
176 bool empty() const { return Ranges.empty(); }
177
178 void addInst(int64_t OffsetFromFirst, Instruction *Inst) {
179 if (auto *SI = dyn_cast<StoreInst>(Inst))
180 addStore(OffsetFromFirst, SI);
181 else
182 addMemSet(OffsetFromFirst, cast<MemSetInst>(Inst));
183 }
184
185 void addStore(int64_t OffsetFromFirst, StoreInst *SI) {
186 TypeSize StoreSize = DL.getTypeStoreSize(SI->getOperand(0)->getType());
187 assert(!StoreSize.isScalable() && "Can't track scalable-typed stores");
188 addRange(OffsetFromFirst, StoreSize.getFixedValue(),
189 SI->getPointerOperand(), SI->getAlign(), SI);
190 }
191
192 void addMemSet(int64_t OffsetFromFirst, MemSetInst *MSI) {
193 int64_t Size = cast<ConstantInt>(MSI->getLength())->getZExtValue();
194 addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getDestAlign(), MSI);
195 }
196
197 void addRange(int64_t Start, int64_t Size, Value *Ptr, MaybeAlign Alignment,
198 Instruction *Inst);
199};
200
201} // end anonymous namespace
202
203/// Add a new store to the MemsetRanges data structure. This adds a
204/// new range for the specified store at the specified offset, merging into
205/// existing ranges as appropriate.
206void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,
207 MaybeAlign Alignment, Instruction *Inst) {
208 int64_t End = Start + Size;
209
210 range_iterator I = partition_point(
211 Ranges, [=](const MemsetRange &O) { return O.End < Start; });
212
213 // We now know that I == E, in which case we didn't find anything to merge
214 // with, or that Start <= I->End. If End < I->Start or I == E, then we need
215 // to insert a new range. Handle this now.
216 if (I == Ranges.end() || End < I->Start) {
217 MemsetRange &R = *Ranges.insert(I, MemsetRange());
218 R.Start = Start;
219 R.End = End;
220 R.StartPtr = Ptr;
221 R.Alignment = Alignment;
222 R.TheStores.push_back(Inst);
223 return;
224 }
225
226 // This store overlaps with I, add it.
227 I->TheStores.push_back(Inst);
228
229 // At this point, we may have an interval that completely contains our store.
230 // If so, just add it to the interval and return.
231 if (I->Start <= Start && I->End >= End)
232 return;
233
234 // Now we know that Start <= I->End and End >= I->Start so the range overlaps
235 // but is not entirely contained within the range.
236
237 // See if the range extends the start of the range. In this case, it couldn't
238 // possibly cause it to join the prior range, because otherwise we would have
239 // stopped on *it*.
240 if (Start < I->Start) {
241 I->Start = Start;
242 I->StartPtr = Ptr;
243 I->Alignment = Alignment;
244 }
245
246 // Now we know that Start <= I->End and Start >= I->Start (so the startpoint
247 // is in or right at the end of I), and that End >= I->Start. Extend I out to
248 // End.
249 if (End > I->End) {
250 I->End = End;
251 range_iterator NextI = I;
252 while (++NextI != Ranges.end() && End >= NextI->Start) {
253 // Merge the range in.
254 I->TheStores.append(NextI->TheStores.begin(), NextI->TheStores.end());
255 if (NextI->End > I->End)
256 I->End = NextI->End;
257 Ranges.erase(NextI);
258 NextI = I;
259 }
260 }
261}
262
263//===----------------------------------------------------------------------===//
264// MemCpyOptLegacyPass Pass
265//===----------------------------------------------------------------------===//
266
267// Check that V is either not accessible by the caller, or unwinding cannot
268// occur between Start and End.
270 Instruction *End) {
271 assert(Start->getParent() == End->getParent() && "Must be in same block");
272 // Function can't unwind, so it also can't be visible through unwinding.
273 if (Start->getFunction()->doesNotThrow())
274 return false;
275
276 // Object is not visible on unwind.
277 // TODO: Support RequiresNoCaptureBeforeUnwind case.
278 bool RequiresNoCaptureBeforeUnwind;
280 RequiresNoCaptureBeforeUnwind) &&
281 !RequiresNoCaptureBeforeUnwind)
282 return false;
283
284 // Check whether there are any unwinding instructions in the range.
285 return any_of(make_range(Start->getIterator(), End->getIterator()),
286 [](const Instruction &I) { return I.mayThrow(); });
287}
288
289void MemCpyOptPass::eraseInstruction(Instruction *I) {
290 MSSAU->removeMemoryAccess(I);
291 EEA->removeInstruction(I);
292 I->eraseFromParent();
293}
294
295// Check for mod or ref of Loc between Start and End, excluding both boundaries.
296// Start and End must be in the same block.
297// If SkippedLifetimeStart is provided, skip over one clobbering lifetime.start
298// intrinsic and store it inside SkippedLifetimeStart.
300 const MemoryUseOrDef *Start,
301 const MemoryUseOrDef *End,
302 Instruction **SkippedLifetimeStart = nullptr) {
303 assert(Start->getBlock() == End->getBlock() && "Only local supported");
304 for (const MemoryAccess &MA :
305 make_range(++Start->getIterator(), End->getIterator())) {
306 Instruction *I = cast<MemoryUseOrDef>(MA).getMemoryInst();
307 if (isModOrRefSet(AA.getModRefInfo(I, Loc))) {
309 if (II && II->getIntrinsicID() == Intrinsic::lifetime_start &&
310 SkippedLifetimeStart && !*SkippedLifetimeStart) {
311 *SkippedLifetimeStart = I;
312 continue;
313 }
314
315 return true;
316 }
317 }
318 return false;
319}
320
321// Check for mod of Loc between Start and End, excluding both boundaries.
322// Start and End can be in different blocks.
324 MemoryLocation Loc, const MemoryUseOrDef *Start,
325 const MemoryUseOrDef *End) {
326 if (isa<MemoryUse>(End)) {
327 // For MemoryUses, getClobberingMemoryAccess may skip non-clobbering writes.
328 // Manually check read accesses between Start and End, if they are in the
329 // same block, for clobbers. Otherwise assume Loc is clobbered.
330 return Start->getBlock() != End->getBlock() ||
331 any_of(
332 make_range(std::next(Start->getIterator()), End->getIterator()),
333 [&AA, Loc](const MemoryAccess &Acc) {
334 if (isa<MemoryUse>(&Acc))
335 return false;
336 Instruction *AccInst =
337 cast<MemoryUseOrDef>(&Acc)->getMemoryInst();
338 return isModSet(AA.getModRefInfo(AccInst, Loc));
339 });
340 }
341
342 // TODO: Only walk until we hit Start.
344 End->getDefiningAccess(), Loc, AA);
345 return !MSSA->dominates(Clobber, Start);
346}
347
348/// When scanning forward over instructions, we look for some other patterns to
349/// fold away. In particular, this looks for stores to neighboring locations of
350/// memory. If it sees enough consecutive ones, it attempts to merge them
351/// together into a memcpy/memset.
352Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
353 Value *StartPtr,
354 Value *ByteVal) {
355 const DataLayout &DL = StartInst->getDataLayout();
356
357 // We can't track scalable types
358 if (auto *SI = dyn_cast<StoreInst>(StartInst))
359 if (DL.getTypeStoreSize(SI->getOperand(0)->getType()).isScalable())
360 return nullptr;
361
362 // Okay, so we now have a single store that can be splatable. Scan to find
363 // all subsequent stores of the same value to offset from the same pointer.
364 // Join these together into ranges, so we can decide whether contiguous blocks
365 // are stored.
366 MemsetRanges Ranges(DL);
367
368 BasicBlock::iterator BI(StartInst);
369
370 // Keeps track of the last memory use or def before the insertion point for
371 // the new memset. The new MemoryDef for the inserted memsets will be inserted
372 // after MemInsertPoint.
373 MemoryUseOrDef *MemInsertPoint = nullptr;
374 for (++BI; !BI->isTerminator(); ++BI) {
375 auto *CurrentAcc =
376 cast_or_null<MemoryUseOrDef>(MSSA->getMemoryAccess(&*BI));
377 if (CurrentAcc)
378 MemInsertPoint = CurrentAcc;
379
380 // Calls that only access inaccessible memory do not block merging
381 // accessible stores.
382 if (auto *CB = dyn_cast<CallBase>(BI)) {
383 if (CB->onlyAccessesInaccessibleMemory())
384 continue;
385 }
386
387 if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) {
388 // If the instruction is readnone, ignore it, otherwise bail out. We
389 // don't even allow readonly here because we don't want something like:
390 // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A).
391 if (BI->mayWriteToMemory() || BI->mayReadFromMemory())
392 break;
393 continue;
394 }
395
396 if (auto *NextStore = dyn_cast<StoreInst>(BI)) {
397 // If this is a store, see if we can merge it in.
398 if (!NextStore->isSimple())
399 break;
400
401 Value *StoredVal = NextStore->getValueOperand();
402
403 // Don't convert stores of non-integral pointer types to memsets (which
404 // stores integers).
405 if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()))
406 break;
407
408 // We can't track ranges involving scalable types.
409 if (DL.getTypeStoreSize(StoredVal->getType()).isScalable())
410 break;
411
412 // Check to see if this stored value is of the same byte-splattable value.
413 Value *StoredByte = isBytewiseValue(StoredVal, DL);
414 // We can blindly merge this store into `StartInst` if it's being filled
415 // with an undef value but we don't because:
416 // 1. `StartInst` can be removed since it's storing an `undef`.
417 // 2. The resulting memset will be much larger than it needs to be.
418 if (ByteVal != StoredByte)
419 break;
420
421 // Check to see if this store is to a constant offset from the start ptr.
422 std::optional<int64_t> Offset =
423 NextStore->getPointerOperand()->getPointerOffsetFrom(StartPtr, DL);
424 if (!Offset)
425 break;
426
427 Ranges.addStore(*Offset, NextStore);
428 } else {
429 auto *MSI = cast<MemSetInst>(BI);
430
431 if (MSI->isVolatile() || ByteVal != MSI->getValue() ||
432 !isa<ConstantInt>(MSI->getLength()))
433 break;
434
435 // Check to see if this store is to a constant offset from the start ptr.
436 std::optional<int64_t> Offset =
437 MSI->getDest()->getPointerOffsetFrom(StartPtr, DL);
438 if (!Offset)
439 break;
440
441 Ranges.addMemSet(*Offset, MSI);
442 }
443 }
444
445 // If we have no ranges, then we just had a single store with nothing that
446 // could be merged in. This is a very common case of course.
447 if (Ranges.empty())
448 return nullptr;
449
450 // If we had at least one store that could be merged in, add the starting
451 // store as well. We try to avoid this unless there is at least something
452 // interesting as a small compile-time optimization.
453 Ranges.addInst(0, StartInst);
454
455 // If we create any memsets, we put it right before the first instruction that
456 // isn't part of the memset block. This ensure that the memset is dominated
457 // by any addressing instruction needed by the start of the block.
458 IRBuilder<> Builder(&*BI);
459
460 // Now that we have full information about ranges, loop over the ranges and
461 // emit memset's for anything big enough to be worthwhile.
462 Instruction *AMemSet = nullptr;
463 for (const MemsetRange &Range : Ranges) {
464 if (Range.TheStores.size() == 1)
465 continue;
466
467 // If it is profitable to lower this range to memset, do so now.
468 if (!Range.isProfitableToUseMemset(DL))
469 continue;
470
471 // Otherwise, we do want to transform this! Create a new memset.
472 // Get the starting pointer of the block.
473 StartPtr = Range.StartPtr;
474
475 AMemSet = Builder.CreateMemSet(StartPtr, ByteVal, Range.End - Range.Start,
476 Range.Alignment);
477 AMemSet->mergeDIAssignID(Range.TheStores);
478
479 LLVM_DEBUG(dbgs() << "Replace stores:\n"; for (Instruction *SI
480 : Range.TheStores) dbgs()
481 << *SI << '\n';
482 dbgs() << "With: " << *AMemSet << '\n');
483 if (!Range.TheStores.empty())
484 AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc());
485
486 auto *NewDef = cast<MemoryDef>(
487 MemInsertPoint->getMemoryInst() == &*BI
488 ? MSSAU->createMemoryAccessBefore(AMemSet, nullptr, MemInsertPoint)
489 : MSSAU->createMemoryAccessAfter(AMemSet, nullptr, MemInsertPoint));
490 MSSAU->insertDef(NewDef, /*RenameUses=*/true);
491 MemInsertPoint = NewDef;
492
493 // Zap all the stores.
494 for (Instruction *SI : Range.TheStores)
496
497 ++NumMemSetInfer;
498 }
499
500 return AMemSet;
501}
502
503// This method try to lift a store instruction before position P.
504// It will lift the store and its argument + that anything that
505// may alias with these.
506// The method returns true if it was successful.
507bool MemCpyOptPass::moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI) {
508 // If the store alias this position, early bail out.
509 MemoryLocation StoreLoc = MemoryLocation::get(SI);
510 if (isModOrRefSet(AA->getModRefInfo(P, StoreLoc)))
511 return false;
512
513 // Keep track of the arguments of all instruction we plan to lift
514 // so we can make sure to lift them as well if appropriate.
515 DenseSet<Instruction *> Args;
516 auto AddArg = [&](Value *Arg) {
517 auto *I = dyn_cast<Instruction>(Arg);
518 if (I && I->getParent() == SI->getParent()) {
519 // Cannot hoist user of P above P
520 if (I == P)
521 return false;
522 Args.insert(I);
523 }
524 return true;
525 };
526 if (!AddArg(SI->getPointerOperand()))
527 return false;
528
529 // Instruction to lift before P.
530 SmallVector<Instruction *, 8> ToLift{SI};
531
532 // Memory locations of lifted instructions.
533 SmallVector<MemoryLocation, 8> MemLocs{StoreLoc};
534
535 // Lifted calls.
537
538 const MemoryLocation LoadLoc = MemoryLocation::get(LI);
539
540 for (auto I = --SI->getIterator(), E = P->getIterator(); I != E; --I) {
541 auto *C = &*I;
542
543 // Make sure hoisting does not perform a store that was not guaranteed to
544 // happen.
546 return false;
547
548 bool MayAlias = isModOrRefSet(AA->getModRefInfo(C, std::nullopt));
549
550 bool NeedLift = false;
551 if (Args.erase(C))
552 NeedLift = true;
553 else if (MayAlias) {
554 NeedLift = llvm::any_of(MemLocs, [C, this](const MemoryLocation &ML) {
555 return isModOrRefSet(AA->getModRefInfo(C, ML));
556 });
557
558 if (!NeedLift)
559 NeedLift = llvm::any_of(Calls, [C, this](const CallBase *Call) {
560 return isModOrRefSet(AA->getModRefInfo(C, Call));
561 });
562 }
563
564 if (!NeedLift)
565 continue;
566
567 if (MayAlias) {
568 // Since LI is implicitly moved downwards past the lifted instructions,
569 // none of them may modify its source.
570 if (isModSet(AA->getModRefInfo(C, LoadLoc)))
571 return false;
572 else if (const auto *Call = dyn_cast<CallBase>(C)) {
573 // If we can't lift this before P, it's game over.
574 if (isModOrRefSet(AA->getModRefInfo(P, Call)))
575 return false;
576
577 Calls.push_back(Call);
578 } else if (isa<LoadInst>(C) || isa<StoreInst>(C) || isa<VAArgInst>(C)) {
579 // If we can't lift this before P, it's game over.
580 auto ML = MemoryLocation::get(C);
581 if (isModOrRefSet(AA->getModRefInfo(P, ML)))
582 return false;
583
584 MemLocs.push_back(ML);
585 } else
586 // We don't know how to lift this instruction.
587 return false;
588 }
589
590 ToLift.push_back(C);
591 for (Value *Op : C->operands())
592 if (!AddArg(Op))
593 return false;
594 }
595
596 // Find MSSA insertion point. Normally P will always have a corresponding
597 // memory access before which we can insert. However, with non-standard AA
598 // pipelines, there may be a mismatch between AA and MSSA, in which case we
599 // will scan for a memory access before P. In either case, we know for sure
600 // that at least the load will have a memory access.
601 // TODO: Simplify this once P will be determined by MSSA, in which case the
602 // discrepancy can no longer occur.
603 MemoryUseOrDef *MemInsertPoint = nullptr;
604 if (MemoryUseOrDef *MA = MSSA->getMemoryAccess(P)) {
605 MemInsertPoint = cast<MemoryUseOrDef>(--MA->getIterator());
606 } else {
607 const Instruction *ConstP = P;
608 for (const Instruction &I : make_range(++ConstP->getReverseIterator(),
609 ++LI->getReverseIterator())) {
610 if (MemoryUseOrDef *MA = MSSA->getMemoryAccess(&I)) {
611 MemInsertPoint = MA;
612 break;
613 }
614 }
615 }
616
617 // We made it, we need to lift.
618 for (auto *I : llvm::reverse(ToLift)) {
619 LLVM_DEBUG(dbgs() << "Lifting " << *I << " before " << *P << "\n");
620 I->moveBefore(P->getIterator());
621 assert(MemInsertPoint && "Must have found insert point");
622 if (MemoryUseOrDef *MA = MSSA->getMemoryAccess(I)) {
623 MSSAU->moveAfter(MA, MemInsertPoint);
624 MemInsertPoint = MA;
625 }
626 }
627
628 return true;
629}
630
631bool MemCpyOptPass::processStoreOfLoad(StoreInst *SI, LoadInst *LI,
632 const DataLayout &DL,
634 if (!LI->isSimple() || !LI->hasOneUse() || LI->getParent() != SI->getParent())
635 return false;
636
637 BatchAAResults BAA(*AA, EEA);
638 auto *T = LI->getType();
639 // Don't introduce calls to memcpy/memmove intrinsics out of thin air if
640 // the corresponding libcalls are not available.
641 // TODO: We should really distinguish between libcall availability and
642 // our ability to introduce intrinsics.
643 if (T->isAggregateType() &&
645 (TLI->has(LibFunc_memcpy) && TLI->has(LibFunc_memmove)))) {
646 MemoryLocation LoadLoc = MemoryLocation::get(LI);
647
648 // We use alias analysis to check if an instruction may store to
649 // the memory we load from in between the load and the store. If
650 // such an instruction is found, we try to promote there instead
651 // of at the store position.
652 // TODO: Can use MSSA for this.
653 Instruction *P = SI;
654 for (auto &I : make_range(++LI->getIterator(), SI->getIterator())) {
655 if (isModSet(BAA.getModRefInfo(&I, LoadLoc))) {
656 P = &I;
657 break;
658 }
659 }
660
661 // If we found an instruction that may write to the loaded memory,
662 // we can try to promote at this position instead of the store
663 // position if nothing aliases the store memory after this and the store
664 // destination is not in the range.
665 if (P == SI || moveUp(SI, P, LI)) {
666 // If we load from memory that may alias the memory we store to,
667 // memmove must be used to preserve semantic. If not, memcpy can
668 // be used. Also, if we load from constant memory, memcpy can be used
669 // as the constant memory won't be modified.
670 bool UseMemMove = false;
671 if (isModSet(AA->getModRefInfo(SI, LoadLoc)))
672 UseMemMove = true;
673
674 IRBuilder<> Builder(P);
675 Value *Size =
676 Builder.CreateTypeSize(Builder.getInt64Ty(), DL.getTypeStoreSize(T));
677 Instruction *M;
678 if (UseMemMove)
679 M = Builder.CreateMemMove(SI->getPointerOperand(), SI->getAlign(),
680 LI->getPointerOperand(), LI->getAlign(),
681 Size);
682 else
683 M = Builder.CreateMemCpy(SI->getPointerOperand(), SI->getAlign(),
684 LI->getPointerOperand(), LI->getAlign(), Size);
685 M->copyMetadata(*SI, LLVMContext::MD_DIAssignID);
686
687 LLVM_DEBUG(dbgs() << "Promoting " << *LI << " to " << *SI << " => " << *M
688 << "\n");
689
690 auto *LastDef = cast<MemoryDef>(MSSA->getMemoryAccess(SI));
691 auto *NewAccess = MSSAU->createMemoryAccessAfter(M, nullptr, LastDef);
692 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
693
696 ++NumMemCpyInstr;
697
698 // Make sure we do not invalidate the iterator.
699 BBI = M->getIterator();
700 return true;
701 }
702 }
703
704 // Detect cases where we're performing call slot forwarding, but
705 // happen to be using a load-store pair to implement it, rather than
706 // a memcpy.
707 auto GetCall = [&]() -> CallInst * {
708 // We defer this expensive clobber walk until the cheap checks
709 // have been done on the source inside performCallSlotOptzn.
710 if (auto *LoadClobber = dyn_cast<MemoryUseOrDef>(
711 MSSA->getWalker()->getClobberingMemoryAccess(LI, BAA)))
712 return dyn_cast_or_null<CallInst>(LoadClobber->getMemoryInst());
713 return nullptr;
714 };
715
716 bool Changed = performCallSlotOptzn(
717 LI, SI, SI->getPointerOperand()->stripPointerCasts(),
719 DL.getTypeStoreSize(SI->getOperand(0)->getType()),
720 std::min(SI->getAlign(), LI->getAlign()), BAA, GetCall);
721 if (Changed) {
724 ++NumMemCpyInstr;
725 return true;
726 }
727
728 // If this is a load-store pair from a stack slot to a stack slot, we
729 // might be able to perform the stack-move optimization just as we do for
730 // memcpys from an alloca to an alloca.
731 if (performStackMoveOptzn(LI, SI, SI->getPointerOperand(),
732 LI->getPointerOperand(), DL.getTypeStoreSize(T),
733 BAA)) {
734 // Avoid invalidating the iterator.
735 BBI = SI->getNextNode()->getIterator();
738 ++NumMemCpyInstr;
739 return true;
740 }
741
742 return false;
743}
744
745bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
746 if (!SI->isSimple())
747 return false;
748
749 // Avoid merging nontemporal stores since the resulting
750 // memcpy/memset would not be able to preserve the nontemporal hint.
751 // In theory we could teach how to propagate the !nontemporal metadata to
752 // memset calls. However, that change would force the backend to
753 // conservatively expand !nontemporal memset calls back to sequences of
754 // store instructions (effectively undoing the merging).
755 if (SI->getMetadata(LLVMContext::MD_nontemporal))
756 return false;
757
758 const DataLayout &DL = SI->getDataLayout();
759
760 Value *StoredVal = SI->getValueOperand();
761
762 // Not all the transforms below are correct for non-integral pointers, bail
763 // until we've audited the individual pieces.
764 if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()))
765 return false;
766
767 // Load to store forwarding can be interpreted as memcpy.
768 if (auto *LI = dyn_cast<LoadInst>(StoredVal))
769 return processStoreOfLoad(SI, LI, DL, BBI);
770
771 // The following code creates memset intrinsics out of thin air. Don't do
772 // this if the corresponding libfunc is not available.
773 // TODO: We should really distinguish between libcall availability and
774 // our ability to introduce intrinsics.
775 if (!(TLI->has(LibFunc_memset) || EnableMemCpyOptWithoutLibcalls))
776 return false;
777
778 // There are two cases that are interesting for this code to handle: memcpy
779 // and memset. Right now we only handle memset.
780
781 // Ensure that the value being stored is something that can be memset'able a
782 // byte at a time like "0" or "-1" or any width, as well as things like
783 // 0xA0A0A0A0 and 0.0.
784 Value *V = SI->getOperand(0);
785 Value *ByteVal = isBytewiseValue(V, DL);
786 if (!ByteVal)
787 return false;
788
789 if (Instruction *I =
790 tryMergingIntoMemset(SI, SI->getPointerOperand(), ByteVal)) {
791 BBI = I->getIterator(); // Don't invalidate iterator.
792 return true;
793 }
794
795 // If we have an aggregate, we try to promote it to memset regardless
796 // of opportunity for merging as it can expose optimization opportunities
797 // in subsequent passes.
798 auto *T = V->getType();
799 if (!T->isAggregateType())
800 return false;
801
802 TypeSize Size = DL.getTypeStoreSize(T);
803 if (Size.isScalable())
804 return false;
805
806 IRBuilder<> Builder(SI);
807 auto *M = Builder.CreateMemSet(SI->getPointerOperand(), ByteVal, Size,
808 SI->getAlign());
809 M->copyMetadata(*SI, LLVMContext::MD_DIAssignID);
810
811 LLVM_DEBUG(dbgs() << "Promoting " << *SI << " to " << *M << "\n");
812
813 // The newly inserted memset is immediately overwritten by the original
814 // store, so we do not need to rename uses.
815 auto *StoreDef = cast<MemoryDef>(MSSA->getMemoryAccess(SI));
816 auto *NewAccess = MSSAU->createMemoryAccessBefore(M, nullptr, StoreDef);
817 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/false);
818
820 NumMemSetInfer++;
821
822 // Make sure we do not invalidate the iterator.
823 BBI = M->getIterator();
824 return true;
825}
826
827bool MemCpyOptPass::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) {
828 // See if there is another memset or store neighboring this memset which
829 // allows us to widen out the memset to do a single larger store.
830 if (isa<ConstantInt>(MSI->getLength()) && !MSI->isVolatile())
831 if (Instruction *I =
832 tryMergingIntoMemset(MSI, MSI->getDest(), MSI->getValue())) {
833 BBI = I->getIterator(); // Don't invalidate iterator.
834 return true;
835 }
836 return false;
837}
838
839/// Takes a memcpy and a call that it depends on,
840/// and checks for the possibility of a call slot optimization by having
841/// the call write its result directly into the destination of the memcpy.
842bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
843 Instruction *cpyStore, Value *cpyDest,
844 Value *cpySrc, TypeSize cpySize,
845 Align cpyDestAlign,
846 BatchAAResults &BAA,
847 std::function<CallInst *()> GetC) {
848 // The general transformation to keep in mind is
849 //
850 // call @func(..., src, ...)
851 // memcpy(dest, src, ...)
852 //
853 // ->
854 //
855 // memcpy(dest, src, ...)
856 // call @func(..., dest, ...)
857 //
858 // Since moving the memcpy is technically awkward, we additionally check that
859 // src only holds uninitialized values at the moment of the call, meaning that
860 // the memcpy can be discarded rather than moved.
861
862 // We can't optimize scalable types.
863 if (cpySize.isScalable())
864 return false;
865
866 // Require that src be an alloca. This simplifies the reasoning considerably.
867 auto *srcAlloca = dyn_cast<AllocaInst>(cpySrc);
868 if (!srcAlloca)
869 return false;
870
871 const DataLayout &DL = cpyLoad->getDataLayout();
872 // We can't optimize scalable types or variable-length allocas.
873 std::optional<TypeSize> SrcAllocaSize = srcAlloca->getAllocationSize(DL);
874 if (!SrcAllocaSize || SrcAllocaSize->isScalable())
875 return false;
876 uint64_t srcSize = SrcAllocaSize->getFixedValue();
877
878 if (cpySize < srcSize)
879 return false;
880
881 CallInst *C = GetC();
882 if (!C)
883 return false;
884
885 // Lifetime marks shouldn't be operated on.
886 if (Function *F = C->getCalledFunction())
887 if (F->isIntrinsic() && F->getIntrinsicID() == Intrinsic::lifetime_start)
888 return false;
889
890 if (C->getParent() != cpyStore->getParent()) {
891 LLVM_DEBUG(dbgs() << "Call Slot: block local restriction\n");
892 return false;
893 }
894
895 MemoryLocation DestLoc =
896 isa<StoreInst>(cpyStore)
897 ? MemoryLocation::get(cpyStore)
898 : MemoryLocation::getForDest(cast<MemCpyInst>(cpyStore));
899
900 // Check that nothing touches the dest of the copy between
901 // the call and the store/memcpy.
902 Instruction *SkippedLifetimeStart = nullptr;
903 if (accessedBetween(BAA, DestLoc, MSSA->getMemoryAccess(C),
904 MSSA->getMemoryAccess(cpyStore), &SkippedLifetimeStart)) {
905 LLVM_DEBUG(dbgs() << "Call Slot: Dest pointer modified after call\n");
906 return false;
907 }
908
909 // If we need to move a lifetime.start above the call, make sure that we can
910 // actually do so. If the argument is bitcasted for example, we would have to
911 // move the bitcast as well, which we don't handle.
912 if (SkippedLifetimeStart) {
913 auto *LifetimeArg =
914 dyn_cast<Instruction>(SkippedLifetimeStart->getOperand(0));
915 if (LifetimeArg && LifetimeArg->getParent() == C->getParent() &&
916 C->comesBefore(LifetimeArg))
917 return false;
918 }
919
920 // Check that storing to the first srcSize bytes of dest will not cause a
921 // trap or data race.
922 bool ExplicitlyDereferenceableOnly;
924 ExplicitlyDereferenceableOnly) ||
925 !isDereferenceableAndAlignedPointer(cpyDest, Align(1), APInt(64, cpySize),
926 DL, C, AC, DT)) {
927 LLVM_DEBUG(dbgs() << "Call Slot: Dest pointer not dereferenceable\n");
928 return false;
929 }
930
931 // Make sure that nothing can observe cpyDest being written early. There are
932 // a number of cases to consider:
933 // 1. cpyDest cannot be accessed between C and cpyStore as a precondition of
934 // the transform.
935 // 2. C itself may not access cpyDest (prior to the transform). This is
936 // checked further below.
937 // 3. If cpyDest is accessible to the caller of this function (potentially
938 // captured and not based on an alloca), we need to ensure that we cannot
939 // unwind between C and cpyStore. This is checked here.
940 // 4. If cpyDest is potentially captured, there may be accesses to it from
941 // another thread. In this case, we need to check that cpyStore is
942 // guaranteed to be executed if C is. As it is a non-atomic access, it
943 // renders accesses from other threads undefined.
944 // TODO: This is currently not checked.
945 if (mayBeVisibleThroughUnwinding(cpyDest, C, cpyStore)) {
946 LLVM_DEBUG(dbgs() << "Call Slot: Dest may be visible through unwinding\n");
947 return false;
948 }
949
950 // Check that dest points to memory that is at least as aligned as src.
951 Align srcAlign = srcAlloca->getAlign();
952 bool isDestSufficientlyAligned = srcAlign <= cpyDestAlign;
953 // If dest is not aligned enough and we can't increase its alignment then
954 // bail out.
955 if (!isDestSufficientlyAligned && !isa<AllocaInst>(cpyDest)) {
956 LLVM_DEBUG(dbgs() << "Call Slot: Dest not sufficiently aligned\n");
957 return false;
958 }
959
960 // Check that src is not accessed except via the call and the memcpy. This
961 // guarantees that it holds only undefined values when passed in (so the final
962 // memcpy can be dropped), that it is not read or written between the call and
963 // the memcpy, and that writing beyond the end of it is undefined.
964 SmallVector<User *, 8> srcUseList(srcAlloca->users());
965 while (!srcUseList.empty()) {
966 User *U = srcUseList.pop_back_val();
967
968 if (isa<AddrSpaceCastInst>(U)) {
969 append_range(srcUseList, U->users());
970 continue;
971 }
973 continue;
974
975 if (U != C && U != cpyLoad) {
976 LLVM_DEBUG(dbgs() << "Call slot: Source accessed by " << *U << "\n");
977 return false;
978 }
979 }
980
981 // Check whether src is captured by the called function, in which case there
982 // may be further indirect uses of src.
983 bool SrcIsCaptured = any_of(C->args(), [&](Use &U) {
984 return U->stripPointerCasts() == cpySrc &&
985 !C->doesNotCapture(C->getArgOperandNo(&U));
986 });
987
988 // If src is captured, then check whether there are any potential uses of
989 // src through the captured pointer before the lifetime of src ends, either
990 // due to a lifetime.end or a return from the function.
991 if (SrcIsCaptured) {
992 // Check that dest is not captured before/at the call. We have already
993 // checked that src is not captured before it. If either had been captured,
994 // then the call might be comparing the argument against the captured dest
995 // or src pointer.
996 Value *DestObj = getUnderlyingObject(cpyDest);
997 if (!isIdentifiedFunctionLocal(DestObj) ||
998 PointerMayBeCapturedBefore(DestObj, /* ReturnCaptures */ true, C, DT,
999 /* IncludeI */ true))
1000 return false;
1001
1002 MemoryLocation SrcLoc =
1003 MemoryLocation(srcAlloca, LocationSize::precise(srcSize));
1004 for (Instruction &I :
1005 make_range(++C->getIterator(), C->getParent()->end())) {
1006 // Lifetime of srcAlloca ends at lifetime.end.
1007 if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
1008 if (II->getIntrinsicID() == Intrinsic::lifetime_end &&
1009 II->getArgOperand(0) == srcAlloca)
1010 break;
1011 }
1012
1013 // Lifetime of srcAlloca ends at return.
1014 if (isa<ReturnInst>(&I))
1015 break;
1016
1017 // Ignore the direct read of src in the load.
1018 if (&I == cpyLoad)
1019 continue;
1020
1021 // Check whether this instruction may mod/ref src through the captured
1022 // pointer (we have already any direct mod/refs in the loop above).
1023 // Also bail if we hit a terminator, as we don't want to scan into other
1024 // blocks.
1025 if (isModOrRefSet(BAA.getModRefInfo(&I, SrcLoc)) || I.isTerminator())
1026 return false;
1027 }
1028 }
1029
1030 // Since we're changing the parameter to the callsite, we need to make sure
1031 // that what would be the new parameter dominates the callsite.
1032 bool NeedMoveGEP = false;
1033 if (!DT->dominates(cpyDest, C)) {
1034 // Support moving a constant index GEP before the call.
1035 auto *GEP = dyn_cast<GetElementPtrInst>(cpyDest);
1036 if (GEP && GEP->hasAllConstantIndices() &&
1037 DT->dominates(GEP->getPointerOperand(), C))
1038 NeedMoveGEP = true;
1039 else
1040 return false;
1041 }
1042
1043 // In addition to knowing that the call does not access src in some
1044 // unexpected manner, for example via a global, which we deduce from
1045 // the use analysis, we also need to know that it does not sneakily
1046 // access dest. We rely on AA to figure this out for us.
1047 MemoryLocation DestWithSrcSize(cpyDest, LocationSize::precise(srcSize));
1048 ModRefInfo MR = BAA.getModRefInfo(C, DestWithSrcSize);
1049 // If necessary, perform additional analysis.
1050 if (isModOrRefSet(MR))
1051 MR = BAA.callCapturesBefore(C, DestWithSrcSize, DT);
1052 if (isModOrRefSet(MR))
1053 return false;
1054
1055 // We can't create address space casts here because we don't know if they're
1056 // safe for the target.
1057 if (cpySrc->getType() != cpyDest->getType())
1058 return false;
1059 for (unsigned ArgI = 0; ArgI < C->arg_size(); ++ArgI)
1060 if (C->getArgOperand(ArgI)->stripPointerCasts() == cpySrc &&
1061 cpySrc->getType() != C->getArgOperand(ArgI)->getType())
1062 return false;
1063
1064 // All the checks have passed, so do the transformation.
1065 bool changedArgument = false;
1066 for (unsigned ArgI = 0; ArgI < C->arg_size(); ++ArgI)
1067 if (C->getArgOperand(ArgI)->stripPointerCasts() == cpySrc) {
1068 changedArgument = true;
1069 C->setArgOperand(ArgI, cpyDest);
1070 }
1071
1072 if (!changedArgument)
1073 return false;
1074
1075 // If the destination wasn't sufficiently aligned then increase its alignment.
1076 if (!isDestSufficientlyAligned) {
1077 assert(isa<AllocaInst>(cpyDest) && "Can only increase alloca alignment!");
1078 cast<AllocaInst>(cpyDest)->setAlignment(srcAlign);
1079 }
1080
1081 if (NeedMoveGEP) {
1082 auto *GEP = dyn_cast<GetElementPtrInst>(cpyDest);
1083 GEP->moveBefore(C->getIterator());
1084 }
1085
1086 if (SkippedLifetimeStart) {
1087 SkippedLifetimeStart->moveBefore(C->getIterator());
1088 MSSAU->moveBefore(MSSA->getMemoryAccess(SkippedLifetimeStart),
1089 MSSA->getMemoryAccess(C));
1090 }
1091
1092 combineAAMetadata(C, cpyLoad);
1093 if (cpyLoad != cpyStore)
1094 combineAAMetadata(C, cpyStore);
1095
1096 ++NumCallSlot;
1097 return true;
1098}
1099
1100/// We've found that the (upward scanning) memory dependence of memcpy 'M' is
1101/// the memcpy 'MDep'. Try to simplify M to copy from MDep's input if we can.
1102bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
1103 MemCpyInst *MDep,
1104 BatchAAResults &BAA) {
1105 // We can only optimize non-volatile memcpy's.
1106 if (MDep->isVolatile())
1107 return false;
1108
1109 // If dep instruction is reading from our current input, then it is a noop
1110 // transfer and substituting the input won't change this instruction. Just
1111 // ignore the input and let someone else zap MDep. This handles cases like:
1112 // memcpy(a <- a)
1113 // memcpy(b <- a)
1114 // This also avoids infinite loops.
1115 if (BAA.isMustAlias(MDep->getDest(), MDep->getSource()))
1116 return false;
1117
1118 int64_t MForwardOffset = 0;
1119 const DataLayout &DL = M->getModule()->getDataLayout();
1120 // We can only transforms memcpy's where the dest of one is the source of the
1121 // other, or they have an offset in a range.
1122 if (M->getSource() != MDep->getDest()) {
1123 std::optional<int64_t> Offset =
1124 M->getSource()->getPointerOffsetFrom(MDep->getDest(), DL);
1125 if (!Offset || *Offset < 0)
1126 return false;
1127 MForwardOffset = *Offset;
1128 }
1129
1130 Value *CopyLength = M->getLength();
1131
1132 // The length of the memcpy's must be the same, or the preceding one must be
1133 // larger than the following one, or the contents of the overread must be
1134 // undefined bytes of a defined size.
1135 if (MForwardOffset != 0 || MDep->getLength() != CopyLength) {
1136 auto *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
1137 auto *MLen = dyn_cast<ConstantInt>(CopyLength);
1138 // This could be converted to a runtime test (%CopyLength =
1139 // min(max(0, MDepLen - MForwardOffset), MLen)), but it is
1140 // unclear if that is useful
1141 if (!MDepLen || !MLen)
1142 return false;
1143 if (MDepLen->getZExtValue() < MLen->getZExtValue() + MForwardOffset) {
1144 if (!overreadUndefContents(MSSA, M, MDep, BAA))
1145 return false;
1146 if (MDepLen->getZExtValue() <= (uint64_t)MForwardOffset)
1147 return false; // Should not reach here (there is obviously no aliasing
1148 // with MDep), so just bail in case it had incomplete info
1149 // somehow
1150 CopyLength = ConstantInt::get(CopyLength->getType(),
1151 MDepLen->getZExtValue() - MForwardOffset);
1152 }
1153 }
1154
1155 IRBuilder<> Builder(M);
1156 auto *CopySource = MDep->getSource();
1157 Instruction *NewCopySource = nullptr;
1158 llvm::scope_exit CleanupOnRet([&] {
1159 if (NewCopySource && NewCopySource->use_empty())
1160 // Safety: It's safe here because we will only allocate more instructions
1161 // after finishing all BatchAA queries, but we have to be careful if we
1162 // want to do something like this in another place. Then we'd probably
1163 // have to delay instruction removal until all transforms on an
1164 // instruction finished.
1165 eraseInstruction(NewCopySource);
1166 });
1167 MaybeAlign CopySourceAlign = MDep->getSourceAlign();
1168 auto MCopyLoc = MemoryLocation::getForSource(MDep);
1169 // Truncate the size of the MDep access to just the bytes read
1170 if (MDep->getLength() != CopyLength) {
1171 auto *ConstLength = cast<ConstantInt>(CopyLength);
1172 MCopyLoc = MCopyLoc.getWithNewSize(
1173 LocationSize::precise(ConstLength->getZExtValue()));
1174 }
1175
1176 // When the forwarding offset is greater than 0, we transform
1177 // memcpy(d1 <- s1)
1178 // memcpy(d2 <- d1+o)
1179 // to
1180 // memcpy(d2 <- s1+o)
1181 if (MForwardOffset > 0) {
1182 // The copy destination of `M` maybe can serve as the source of copying.
1183 std::optional<int64_t> MDestOffset =
1184 M->getRawDest()->getPointerOffsetFrom(MDep->getRawSource(), DL);
1185 if (MDestOffset == MForwardOffset)
1186 CopySource = M->getDest();
1187 else {
1188 CopySource = Builder.CreateInBoundsPtrAdd(
1189 CopySource, Builder.getInt64(MForwardOffset));
1190 NewCopySource = dyn_cast<Instruction>(CopySource);
1191 }
1192 // We need to update `MCopyLoc` if an offset exists.
1193 MCopyLoc = MCopyLoc.getWithNewPtr(CopySource);
1194 if (CopySourceAlign)
1195 CopySourceAlign = commonAlignment(*CopySourceAlign, MForwardOffset);
1196 }
1197
1198 // Verify that the copied-from memory doesn't change in between the two
1199 // transfers. For example, in:
1200 // memcpy(a <- b)
1201 // *b = 42;
1202 // memcpy(c <- a)
1203 // It would be invalid to transform the second memcpy into memcpy(c <- b).
1204 //
1205 // TODO: If the code between M and MDep is transparent to the destination "c",
1206 // then we could still perform the xform by moving M up to the first memcpy.
1207 if (writtenBetween(MSSA, BAA, MCopyLoc, MSSA->getMemoryAccess(MDep),
1208 MSSA->getMemoryAccess(M)))
1209 return false;
1210
1211 // No need to create `memcpy(a <- a)`.
1212 if (BAA.isMustAlias(M->getDest(), CopySource)) {
1213 // Remove the instruction we're replacing.
1215 ++NumMemCpyInstr;
1216 return true;
1217 }
1218
1219 // If the dest of the second might alias the source of the first, then the
1220 // source and dest might overlap. In addition, if the source of the first
1221 // points to constant memory, they won't overlap by definition. Otherwise, we
1222 // still want to eliminate the intermediate value, but we have to generate a
1223 // memmove instead of memcpy.
1224 bool UseMemMove = false;
1226 // Don't convert llvm.memcpy.inline into memmove because memmove can be
1227 // lowered as a call, and that is not allowed for llvm.memcpy.inline (and
1228 // there is no inline version of llvm.memmove)
1229 if (M->isForceInlined())
1230 return false;
1231 UseMemMove = true;
1232 }
1233
1234 // If all checks passed, then we can transform M.
1235 LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy->memcpy src:\n"
1236 << *MDep << '\n'
1237 << *M << '\n');
1238
1239 // TODO: Is this worth it if we're creating a less aligned memcpy? For
1240 // example we could be moving from movaps -> movq on x86.
1241 Instruction *NewM;
1242 if (UseMemMove)
1243 NewM = Builder.CreateMemMove(M->getDest(), M->getDestAlign(), CopySource,
1244 CopySourceAlign, CopyLength, M->isVolatile());
1245 else if (M->isForceInlined())
1246 // llvm.memcpy may be promoted to llvm.memcpy.inline, but the converse is
1247 // never allowed since that would allow the latter to be lowered as a call
1248 // to an external function.
1249 NewM = Builder.CreateMemCpyInline(M->getDest(), M->getDestAlign(),
1250 CopySource, CopySourceAlign, CopyLength,
1251 M->isVolatile());
1252 else
1253 NewM = Builder.CreateMemCpy(M->getDest(), M->getDestAlign(), CopySource,
1254 CopySourceAlign, CopyLength, M->isVolatile());
1255
1256 NewM->copyMetadata(*M, LLVMContext::MD_DIAssignID);
1257
1258 assert(isa<MemoryDef>(MSSA->getMemoryAccess(M)));
1259 auto *LastDef = cast<MemoryDef>(MSSA->getMemoryAccess(M));
1260 auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, nullptr, LastDef);
1261 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1262
1263 // Remove the instruction we're replacing.
1265 ++NumMemCpyInstr;
1266 return true;
1267}
1268
1269/// We've found that the (upward scanning) memory dependence of \p MemCpy is
1270/// \p MemSet. Try to simplify \p MemSet to only set the trailing bytes that
1271/// weren't copied over by \p MemCpy.
1272///
1273/// In other words, transform:
1274/// \code
1275/// memset(dst, c, dst_size);
1276/// ...
1277/// memcpy(dst, src, src_size);
1278/// \endcode
1279/// into:
1280/// \code
1281/// ...
1282/// memset(dst + src_size, c, dst_size <= src_size ? 0 : dst_size - src_size);
1283/// memcpy(dst, src, src_size);
1284/// \endcode
1285///
1286/// The memset is sunk to just before the memcpy to ensure that src_size is
1287/// present when emitting the simplified memset.
1288bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
1289 MemSetInst *MemSet,
1290 BatchAAResults &BAA) {
1291 // We can only transform memset/memcpy with the same destination.
1292 if (!BAA.isMustAlias(MemSet->getDest(), MemCpy->getDest()))
1293 return false;
1294
1295 if (MemSet->isVolatile())
1296 return false;
1297
1298 // Don't perform the transform if src_size may be zero. In that case, the
1299 // transform is essentially a complex no-op and may lead to an infinite
1300 // loop if BasicAA is smart enough to understand that dst and dst + src_size
1301 // are still MustAlias after the transform.
1302 Value *SrcSize = MemCpy->getLength();
1303 if (!isKnownNonZero(SrcSize,
1304 SimplifyQuery(MemCpy->getDataLayout(), DT, AC, MemCpy)))
1305 return false;
1306
1307 // Check that src and dst of the memcpy aren't the same. While memcpy
1308 // operands cannot partially overlap, exact equality is allowed.
1309 if (isModSet(BAA.getModRefInfo(MemCpy, MemoryLocation::getForSource(MemCpy))))
1310 return false;
1311
1312 // We know that dst up to src_size is not written. We now need to make sure
1313 // that dst up to dst_size is not accessed. (If we did not move the memset,
1314 // checking for reads would be sufficient.)
1316 MSSA->getMemoryAccess(MemSet),
1317 MSSA->getMemoryAccess(MemCpy)))
1318 return false;
1319
1320 // Use the same i8* dest as the memcpy, killing the memset dest if different.
1321 Value *Dest = MemCpy->getRawDest();
1322 Value *DestSize = MemSet->getLength();
1323
1324 if (mayBeVisibleThroughUnwinding(Dest, MemSet, MemCpy))
1325 return false;
1326
1327 // If the sizes are the same, simply drop the memset instead of generating
1328 // a replacement with zero size.
1329 if (DestSize == SrcSize) {
1330 eraseInstruction(MemSet);
1331 return true;
1332 }
1333
1334 // By default, create an unaligned memset.
1335 Align Alignment = Align(1);
1336 // If Dest is aligned, and SrcSize is constant, use the minimum alignment
1337 // of the sum.
1338 const Align DestAlign = std::max(MemSet->getDestAlign().valueOrOne(),
1339 MemCpy->getDestAlign().valueOrOne());
1340 if (DestAlign > 1)
1341 if (auto *SrcSizeC = dyn_cast<ConstantInt>(SrcSize))
1342 Alignment = commonAlignment(DestAlign, SrcSizeC->getZExtValue());
1343
1344 IRBuilder<> Builder(MemCpy);
1345
1346 // Preserve the debug location of the old memset for the code emitted here
1347 // related to the new memset. This is correct according to the rules in
1348 // https://llvm.org/docs/HowToUpdateDebugInfo.html about "when to preserve an
1349 // instruction location", given that we move the memset within the basic
1350 // block.
1351 assert(MemSet->getParent() == MemCpy->getParent() &&
1352 "Preserving debug location based on moving memset within BB.");
1353 Builder.SetCurrentDebugLocation(MemSet->getDebugLoc());
1354
1355 // If the sizes have different types, zext the smaller one.
1356 if (DestSize->getType() != SrcSize->getType()) {
1357 if (DestSize->getType()->getIntegerBitWidth() >
1358 SrcSize->getType()->getIntegerBitWidth())
1359 SrcSize = Builder.CreateZExt(SrcSize, DestSize->getType());
1360 else
1361 DestSize = Builder.CreateZExt(DestSize, SrcSize->getType());
1362 }
1363
1364 Value *Ule = Builder.CreateICmpULE(DestSize, SrcSize);
1365 Value *SizeDiff = Builder.CreateSub(DestSize, SrcSize);
1366 Value *MemsetLen = Builder.CreateSelect(
1367 Ule, ConstantInt::getNullValue(DestSize->getType()), SizeDiff);
1368 // FIXME (#167968): we could explore estimating the branch_weights based on
1369 // value profiling data about the 2 sizes.
1370 if (auto *SI = dyn_cast<SelectInst>(MemsetLen))
1372 Instruction *NewMemSet =
1373 Builder.CreateMemSet(Builder.CreatePtrAdd(Dest, SrcSize),
1374 MemSet->getOperand(1), MemsetLen, Alignment);
1375
1376 assert(isa<MemoryDef>(MSSA->getMemoryAccess(MemCpy)) &&
1377 "MemCpy must be a MemoryDef");
1378 // The new memset is inserted before the memcpy, and it is known that the
1379 // memcpy's defining access is the memset about to be removed.
1380 auto *LastDef = cast<MemoryDef>(MSSA->getMemoryAccess(MemCpy));
1381 auto *NewAccess =
1382 MSSAU->createMemoryAccessBefore(NewMemSet, nullptr, LastDef);
1383 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1384
1385 eraseInstruction(MemSet);
1386 return true;
1387}
1388
1389/// Determine whether the pointer V had only undefined content (due to Def),
1390/// either because it was freshly alloca'd or started its lifetime.
1392 MemoryDef *Def) {
1393 if (MSSA->isLiveOnEntryDef(Def))
1395
1396 if (auto *II = dyn_cast_or_null<IntrinsicInst>(Def->getMemoryInst()))
1397 if (II->getIntrinsicID() == Intrinsic::lifetime_start)
1398 if (auto *Alloca = dyn_cast<AllocaInst>(getUnderlyingObject(V)))
1399 return II->getArgOperand(0) == Alloca;
1400
1401 return false;
1402}
1403
1404// If the memcpy is larger than the previous, but the memory was undef prior to
1405// that, we can just ignore the tail. Technically we're only interested in the
1406// bytes from 0..MemSrcOffset and MemSrcLength+MemSrcOffset..CopySize here, but
1407// as we can't easily represent this location (hasUndefContents uses mustAlias
1408// which cannot deal with offsets), we use the full 0..CopySize range.
1409static bool overreadUndefContents(MemorySSA *MSSA, MemCpyInst *MemCpy,
1410 MemIntrinsic *MemSrc, BatchAAResults &BAA) {
1411 MemoryLocation MemCpyLoc = MemoryLocation::getForSource(MemCpy);
1412 MemoryUseOrDef *MemSrcAccess = MSSA->getMemoryAccess(MemSrc);
1414 MemSrcAccess->getDefiningAccess(), MemCpyLoc, BAA);
1415 if (auto *MD = dyn_cast<MemoryDef>(Clobber))
1416 if (hasUndefContents(MSSA, BAA, MemCpy->getSource(), MD))
1417 return true;
1418 return false;
1419}
1420
1421/// Transform memcpy to memset when its source was just memset.
1422/// In other words, turn:
1423/// \code
1424/// memset(dst1, c, dst1_size);
1425/// memcpy(dst2, dst1, dst2_size);
1426/// \endcode
1427/// into:
1428/// \code
1429/// memset(dst1, c, dst1_size);
1430/// memset(dst2, c, dst2_size);
1431/// \endcode
1432bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
1433 MemSetInst *MemSet,
1434 BatchAAResults &BAA) {
1435 Value *MemSetSize = MemSet->getLength();
1436 Value *CopySize = MemCpy->getLength();
1437
1438 int64_t MOffset = 0;
1439 const DataLayout &DL = MemCpy->getModule()->getDataLayout();
1440 // We can only transforms memcpy's where the dest of one is the source of the
1441 // other, or they have a known offset.
1442 if (MemCpy->getSource() != MemSet->getDest()) {
1443 std::optional<int64_t> Offset =
1444 MemCpy->getSource()->getPointerOffsetFrom(MemSet->getDest(), DL);
1445 if (!Offset)
1446 return false;
1447 // On positive offsets, the memcpy source is at a offset into the memset'd
1448 // region. On negative offsets, the copy starts at a offset prior to the
1449 // previously memset'd area, namely, we memcpy from a partially initialized
1450 // region.
1451 MOffset = *Offset;
1452 }
1453
1454 if (MOffset != 0 || MemSetSize != CopySize) {
1455 // Make sure the memcpy doesn't read any more than what the memset wrote,
1456 // other than undef. Likewise, the memcpy should not read from an area not
1457 // covered by the memset unless undef bytes. Don't worry about sizes larger
1458 // than i64.
1459 auto *CMemSetSize = dyn_cast<ConstantInt>(MemSetSize);
1460 auto *CCopySize = dyn_cast<ConstantInt>(CopySize);
1461 if (!CMemSetSize || !CCopySize || MOffset < 0 ||
1462 CCopySize->getZExtValue() + MOffset > CMemSetSize->getZExtValue()) {
1463 if (!overreadUndefContents(MSSA, MemCpy, MemSet, BAA))
1464 return false;
1465
1466 if (CMemSetSize && CCopySize) {
1467 uint64_t MemSetSizeVal = CMemSetSize->getZExtValue();
1468 uint64_t MemCpySizeVal = CCopySize->getZExtValue();
1469 uint64_t NewSize;
1470
1471 if (MOffset < 0) {
1472 // Offset from beginning of the initialized region.
1473 uint64_t Offset = -MOffset;
1474 NewSize = MemCpySizeVal <= Offset ? 0 : MemCpySizeVal - Offset;
1475 } else if (MOffset == 0) {
1476 NewSize = MemSetSizeVal;
1477 } else {
1478 NewSize =
1479 MemSetSizeVal <= (uint64_t)MOffset ? 0 : MemSetSizeVal - MOffset;
1480 }
1481 CopySize = ConstantInt::get(CopySize->getType(), NewSize);
1482 } else {
1483 if (MOffset < 0)
1484 return false;
1485 }
1486 }
1487 }
1488
1489 IRBuilder<> Builder(MemCpy);
1490 Value *DestPtr = MemCpy->getRawDest();
1491 MaybeAlign Align = MemCpy->getDestAlign();
1492 if (MOffset < 0) {
1493 DestPtr = Builder.CreatePtrAdd(DestPtr, Builder.getInt64(-MOffset));
1494 if (Align)
1495 Align = commonAlignment(*Align, -MOffset);
1496 }
1497
1498 Instruction *NewM =
1499 Builder.CreateMemSet(DestPtr, MemSet->getOperand(1), CopySize, Align);
1500 auto *LastDef = cast<MemoryDef>(MSSA->getMemoryAccess(MemCpy));
1501 auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, nullptr, LastDef);
1502 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1503
1504 return true;
1505}
1506
1507// Attempts to optimize the pattern whereby memory is copied from an alloca to
1508// another alloca, where the two allocas don't have conflicting mod/ref. If
1509// successful, the two allocas can be merged into one and the transfer can be
1510// deleted. This pattern is generated frequently in Rust, due to the ubiquity of
1511// move operations in that language.
1512//
1513// Once we determine that the optimization is safe to perform, we replace all
1514// uses of the destination alloca with the source alloca. We also "shrink wrap"
1515// the lifetime markers of the single merged alloca to before the first use
1516// and after the last use. Note that the "shrink wrapping" procedure is a safe
1517// transformation only because we restrict the scope of this optimization to
1518// allocas that aren't captured.
1519bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
1520 Value *DestPtr, Value *SrcPtr,
1521 TypeSize Size, BatchAAResults &BAA) {
1522 LLVM_DEBUG(dbgs() << "Stack Move: Attempting to optimize:\n"
1523 << *Store << "\n");
1524
1525 AllocaInst *DestAlloca = dyn_cast<AllocaInst>(getUnderlyingObject(DestPtr));
1526 if (!DestAlloca)
1527 return false;
1528
1529 AllocaInst *SrcAlloca = dyn_cast<AllocaInst>(getUnderlyingObject(SrcPtr));
1530 if (!SrcAlloca)
1531 return false;
1532
1533 // Explicitly don't handle degenerate case of a partial copy within one
1534 // alloca. It would always fail the dominator check later anyways, and
1535 // possibly the modref checks also.
1536 if (SrcAlloca == DestAlloca)
1537 return false;
1538
1539 // Make sure the two allocas are in the same address space.
1540 if (SrcAlloca->getAddressSpace() != DestAlloca->getAddressSpace()) {
1541 LLVM_DEBUG(dbgs() << "Stack Move: Address space mismatch\n");
1542 return false;
1543 }
1544
1545 if (!SrcAlloca->isStaticAlloca() || !DestAlloca->isStaticAlloca())
1546 return false;
1547
1548 // Check that copy is full with static size.
1549 const DataLayout &DL = DestAlloca->getDataLayout();
1550
1551 auto DestOffset = DestPtr->getPointerOffsetFrom(DestAlloca, DL);
1552 if (!DestOffset)
1553 return false;
1554
1555 auto SrcOffset = SrcPtr->getPointerOffsetFrom(SrcAlloca, DL);
1556 if (!SrcOffset || *SrcOffset < *DestOffset || *SrcOffset < 0)
1557 return false;
1558 // Offset difference must preserve dest alloca's alignment.
1559 if ((*SrcOffset - *DestOffset) % DestAlloca->getAlign().value() != 0)
1560 return false;
1561 std::optional<TypeSize> SrcSize = SrcAlloca->getAllocationSize(DL);
1562 std::optional<TypeSize> DestSize = DestAlloca->getAllocationSize(DL);
1563 if (!SrcSize || !DestSize)
1564 return false;
1565 if (*SrcSize != *DestSize)
1566 if (!SrcSize->isFixed() || !DestSize->isFixed())
1567 return false;
1568 // Check that copy covers entirety of dest alloca.
1569 if (Size != *DestSize || *DestOffset != 0) {
1570 LLVM_DEBUG(dbgs() << "Stack Move: Destination alloca size mismatch\n");
1571 return false;
1572 }
1573
1574 // Check if it will be legal to combine allocas without breaking dominator.
1575 bool MoveSrc = !DT->dominates(SrcAlloca, DestAlloca);
1576 if (MoveSrc) {
1577 if (!DT->dominates(DestAlloca, SrcAlloca))
1578 return false;
1579 }
1580
1581 // Check that src and dest are never captured, unescaped allocas. Also
1582 // find the nearest common dominator and postdominator for all users in
1583 // order to shrink wrap the lifetimes, and instructions with noalias metadata
1584 // to remove them.
1585
1586 SmallVector<Instruction *, 4> LifetimeMarkers;
1587 SmallPtrSet<Instruction *, 4> AAMetadataInstrs;
1588
1589 auto CaptureTrackingWithModRef =
1590 [&](Instruction *AI, function_ref<bool(Instruction *)> ModRefCallback,
1591 bool &AddressCaptured) -> bool {
1592 SmallVector<Instruction *, 8> Worklist;
1593 Worklist.push_back(AI);
1594 unsigned MaxUsesToExplore = getDefaultMaxUsesToExploreForCaptureTracking();
1595 Worklist.reserve(MaxUsesToExplore);
1596 SmallPtrSet<const Use *, 20> Visited;
1597 while (!Worklist.empty()) {
1598 Instruction *I = Worklist.pop_back_val();
1599 for (const Use &U : I->uses()) {
1600 auto *UI = cast<Instruction>(U.getUser());
1601
1602 if (Visited.size() >= MaxUsesToExplore) {
1603 LLVM_DEBUG(
1604 dbgs()
1605 << "Stack Move: Exceeded max uses to see ModRef, bailing\n");
1606 return false;
1607 }
1608 if (!Visited.insert(&U).second)
1609 continue;
1610 UseCaptureInfo CI = DetermineUseCaptureKind(U, AI);
1612 return false;
1613 AddressCaptured |= capturesAddress(CI.UseCC);
1614
1615 if (UI->mayReadOrWriteMemory()) {
1616 if (UI->isLifetimeStartOrEnd()) {
1617 // We note the locations of these intrinsic calls so that we can
1618 // delete them later if the optimization succeeds, this is safe
1619 // since both llvm.lifetime.start and llvm.lifetime.end intrinsics
1620 // practically fill all the bytes of the alloca with an undefined
1621 // value, although conceptually marked as alive/dead.
1622 LifetimeMarkers.push_back(UI);
1623 continue;
1624 }
1625 AAMetadataInstrs.insert(UI);
1626
1627 if (!ModRefCallback(UI))
1628 return false;
1629 }
1630
1631 if (capturesAnything(CI.ResultCC)) {
1632 Worklist.push_back(UI);
1633 continue;
1634 }
1635 }
1636 }
1637 return true;
1638 };
1639
1640 // Check that dest alloca has no Mod/Ref, from the alloca to the Store. And
1641 // collect modref inst for the reachability check.
1642 ModRefInfo DestModRef = ModRefInfo::NoModRef;
1643 MemoryLocation DestLoc(DestAlloca, LocationSize::precise(*DestSize));
1644 SmallVector<BasicBlock *, 8> ReachabilityWorklist;
1645 auto DestModRefCallback = [&](Instruction *UI) -> bool {
1646 // We don't care about the store itself.
1647 if (UI == Store)
1648 return true;
1649 ModRefInfo Res = BAA.getModRefInfo(UI, DestLoc);
1650 DestModRef |= Res;
1651 if (isModOrRefSet(Res)) {
1652 // Instructions reachability checks.
1653 // FIXME: adding the Instruction version isPotentiallyReachableFromMany on
1654 // lib/Analysis/CFG.cpp (currently only for BasicBlocks) might be helpful.
1655 if (UI->getParent() == Store->getParent()) {
1656 // The same block case is special because it's the only time we're
1657 // looking within a single block to see which instruction comes first.
1658 // Once we start looking at multiple blocks, the first instruction of
1659 // the block is reachable, so we only need to determine reachability
1660 // between whole blocks.
1661 BasicBlock *BB = UI->getParent();
1662
1663 // If A comes before B, then B is definitively reachable from A.
1664 if (UI->comesBefore(Store))
1665 return false;
1666
1667 // If the user's parent block is entry, no predecessor exists.
1668 if (BB->isEntryBlock())
1669 return true;
1670
1671 // Otherwise, continue doing the normal per-BB CFG walk.
1672 ReachabilityWorklist.append(succ_begin(BB), succ_end(BB));
1673 } else {
1674 ReachabilityWorklist.push_back(UI->getParent());
1675 }
1676 }
1677 return true;
1678 };
1679
1680 bool DestAddressCaptured = false;
1681 if (!CaptureTrackingWithModRef(DestAlloca, DestModRefCallback,
1682 DestAddressCaptured))
1683 return false;
1684 // Bailout if Dest may have any ModRef before Store.
1685 if (!ReachabilityWorklist.empty() &&
1686 isPotentiallyReachableFromMany(ReachabilityWorklist, Store->getParent(),
1687 nullptr, DT, nullptr))
1688 return false;
1689
1690 // Check that, from after the Load to the end of the BB,
1691 // - if the dest has any Mod, src has no Ref, and
1692 // - if the dest has any Ref, src has no Mod except full-sized lifetimes
1693 // Where:
1694 // - src is defined as the memory from max(SrcAlloca, SrcPtr minus
1695 // dest_offset) to min(dest_size, SrcSize minus SrcOffset)
1696 // - dest_offset and dest_size could be computed by DestModRefCallback
1697 // to be the bounds of the first and last mod region, and which is at
1698 // least as large as DestOffset to DestSize, and at most as large as
1699 // SrcAlloca to SrcSize.
1700 // - Currently DestOffset==0 and DestSize==Size, so this math is simplified.
1701 MemoryLocation SrcLoc(SrcPtr, LocationSize::precise(Size));
1702
1703 auto SrcModRefCallback = [&](Instruction *UI) -> bool {
1704 // Any ModRef post-dominated by Load doesn't matter, also Load and Store
1705 // themselves can be ignored.
1706 if (PDT->dominates(Load, UI) || UI == Load || UI == Store)
1707 return true;
1708 ModRefInfo Res = BAA.getModRefInfo(UI, SrcLoc);
1709 if ((isModSet(DestModRef) && isRefSet(Res)) ||
1710 (isRefSet(DestModRef) && isModSet(Res)))
1711 return false;
1712
1713 return true;
1714 };
1715
1716 bool SrcAddressCaptured = false;
1717 if (!CaptureTrackingWithModRef(SrcAlloca, SrcModRefCallback,
1718 SrcAddressCaptured))
1719 return false;
1720
1721 // If both the source and destination address are captured, the fact that they
1722 // are no longer two separate allocations may be observed.
1723 if (DestAddressCaptured && SrcAddressCaptured)
1724 return false;
1725
1726 // We can now do the transformation. First move the Src if it was after Dest.
1727 if (MoveSrc)
1728 SrcAlloca->moveBefore(DestAlloca->getIterator());
1729
1730 // Align the allocas appropriately.
1731 SrcAlloca->setAlignment(
1732 std::max(SrcAlloca->getAlign(), DestAlloca->getAlign()));
1733
1734 // Size the allocas appropriately.
1735 if (*SrcSize != *DestSize) {
1736 // Only possible if both sizes are fixed (due to earlier check)
1737 // Set Src to the type and array size of Dest if Dest was larger
1738 if (DestSize->getFixedValue() > SrcSize->getFixedValue()) {
1739 SrcAlloca->setAllocatedType(DestAlloca->getAllocatedType());
1740 SrcAlloca->setOperand(0, DestAlloca->getArraySize());
1741 }
1742 }
1743
1744 // Merge the two allocas.
1745 Value *NewDestPtr = SrcAlloca;
1746 if (*SrcOffset != *DestOffset) {
1747 IRBuilder<> Builder(DestAlloca);
1748 NewDestPtr = Builder.CreateInBoundsPtrAdd(
1749 SrcAlloca, Builder.getInt64(*SrcOffset - *DestOffset));
1750 }
1751 DestAlloca->replaceAllUsesWith(NewDestPtr);
1752 eraseInstruction(DestAlloca);
1753
1754 // Drop metadata on the source alloca.
1755 SrcAlloca->dropUnknownNonDebugMetadata();
1756
1757 // TODO: Reconstruct merged lifetime markers.
1758 // Remove all other lifetime markers. if the original lifetime intrinsics
1759 // exists.
1760 if (!LifetimeMarkers.empty()) {
1761 for (Instruction *I : LifetimeMarkers)
1763 }
1764
1765 // As this transformation can cause memory accesses that didn't previously
1766 // alias to begin to alias one another, we remove !alias.scope, !noalias,
1767 // !tbaa and !tbaa_struct metadata from any uses of either alloca.
1768 // This is conservative, but more precision doesn't seem worthwhile
1769 // right now.
1770 for (Instruction *I : AAMetadataInstrs) {
1771 I->setMetadata(LLVMContext::MD_alias_scope, nullptr);
1772 I->setMetadata(LLVMContext::MD_noalias, nullptr);
1773 I->setMetadata(LLVMContext::MD_tbaa, nullptr);
1774 I->setMetadata(LLVMContext::MD_tbaa_struct, nullptr);
1775 }
1776
1777 LLVM_DEBUG(dbgs() << "Stack Move: Performed stack-move optimization\n");
1778 NumStackMove++;
1779 return true;
1780}
1781
1782static bool isZeroSize(Value *Size) {
1783 if (auto *I = dyn_cast<Instruction>(Size))
1784 if (auto *Res = simplifyInstruction(I, I->getDataLayout()))
1785 Size = Res;
1786 // Treat undef/poison size like zero.
1787 if (auto *C = dyn_cast<Constant>(Size))
1788 return isa<UndefValue>(C) || C->isNullValue();
1789 return false;
1790}
1791
1792/// Perform simplification of memcpy's. If we have memcpy A
1793/// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite
1794/// B to be a memcpy from X to Z (or potentially a memmove, depending on
1795/// circumstances). This allows later passes to remove the first memcpy
1796/// altogether.
1797bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
1798 // We can only optimize non-volatile memcpy's.
1799 if (M->isVolatile())
1800 return false;
1801
1802 // If the source and destination of the memcpy are the same, then zap it.
1803 if (M->getSource() == M->getDest()) {
1804 ++BBI;
1806 return true;
1807 }
1808
1809 // If the size is zero, remove the memcpy.
1810 if (isZeroSize(M->getLength())) {
1811 ++BBI;
1813 return true;
1814 }
1815
1816 MemoryUseOrDef *MA = MSSA->getMemoryAccess(M);
1817 if (!MA)
1818 // Degenerate case: memcpy marked as not accessing memory.
1819 return false;
1820
1821 // If copying from a constant, try to turn the memcpy into a memset.
1822 if (auto *GV = dyn_cast<GlobalVariable>(getUnderlyingObject(M->getSource())))
1823 if (GV->isConstant() && GV->hasDefinitiveInitializer())
1824 if (Value *ByteVal = isBytewiseValue(GV->getInitializer(),
1825 M->getDataLayout())) {
1826 IRBuilder<> Builder(M);
1827 Instruction *NewM = Builder.CreateMemSet(
1828 M->getRawDest(), ByteVal, M->getLength(), M->getDestAlign(), false);
1829 auto *LastDef = cast<MemoryDef>(MA);
1830 auto *NewAccess =
1831 MSSAU->createMemoryAccessAfter(NewM, nullptr, LastDef);
1832 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1833
1835 ++NumCpyToSet;
1836 return true;
1837 }
1838
1839 BatchAAResults BAA(*AA, EEA);
1840 // FIXME: Not using getClobberingMemoryAccess() here due to PR54682.
1841 MemoryAccess *AnyClobber = MA->getDefiningAccess();
1842 MemoryLocation DestLoc = MemoryLocation::getForDest(M);
1843 const MemoryAccess *DestClobber =
1844 MSSA->getWalker()->getClobberingMemoryAccess(AnyClobber, DestLoc, BAA);
1845
1846 // Try to turn a partially redundant memset + memcpy into
1847 // smaller memset + memcpy. We don't need the memcpy size for this.
1848 // The memcpy must post-dom the memset, so limit this to the same basic
1849 // block. A non-local generalization is likely not worthwhile.
1850 if (auto *MD = dyn_cast<MemoryDef>(DestClobber))
1851 if (auto *MDep = dyn_cast_or_null<MemSetInst>(MD->getMemoryInst()))
1852 if (DestClobber->getBlock() == M->getParent())
1853 if (processMemSetMemCpyDependence(M, MDep, BAA))
1854 return true;
1855
1856 MemoryAccess *SrcClobber = MSSA->getWalker()->getClobberingMemoryAccess(
1857 AnyClobber, MemoryLocation::getForSource(M), BAA);
1858
1859 // There are five possible optimizations we can do for memcpy:
1860 // a) memcpy-memcpy xform which exposes redundance for DSE.
1861 // b) call-memcpy xform for return slot optimization.
1862 // c) memcpy from freshly alloca'd space or space that has just started
1863 // its lifetime copies undefined data, and we can therefore eliminate
1864 // the memcpy in favor of the data that was already at the destination.
1865 // d) memcpy from a just-memset'd source can be turned into memset.
1866 // e) elimination of memcpy via stack-move optimization.
1867 if (auto *MD = dyn_cast<MemoryDef>(SrcClobber)) {
1868 if (Instruction *MI = MD->getMemoryInst()) {
1869 if (auto *CopySize = dyn_cast<ConstantInt>(M->getLength())) {
1870 if (auto *C = dyn_cast<CallInst>(MI)) {
1871 if (performCallSlotOptzn(M, M, M->getDest(), M->getSource(),
1872 TypeSize::getFixed(CopySize->getZExtValue()),
1873 M->getDestAlign().valueOrOne(), BAA,
1874 [C]() -> CallInst * { return C; })) {
1875 LLVM_DEBUG(dbgs() << "Performed call slot optimization:\n"
1876 << " call: " << *C << "\n"
1877 << " memcpy: " << *M << "\n");
1879 ++NumMemCpyInstr;
1880 return true;
1881 }
1882 }
1883 }
1884 if (auto *MDep = dyn_cast<MemCpyInst>(MI))
1885 if (processMemCpyMemCpyDependence(M, MDep, BAA))
1886 return true;
1887 if (auto *MDep = dyn_cast<MemSetInst>(MI)) {
1888 if (performMemCpyToMemSetOptzn(M, MDep, BAA)) {
1889 LLVM_DEBUG(dbgs() << "Converted memcpy to memset\n");
1891 ++NumCpyToSet;
1892 return true;
1893 }
1894 }
1895 }
1896
1897 if (hasUndefContents(MSSA, BAA, M->getSource(), MD)) {
1898 LLVM_DEBUG(dbgs() << "Removed memcpy from undef\n");
1900 ++NumMemCpyInstr;
1901 return true;
1902 }
1903 }
1904
1905 // If the transfer is from a stack slot to a stack slot, then we may be able
1906 // to perform the stack-move optimization. See the comments in
1907 // performStackMoveOptzn() for more details.
1908 ConstantInt *Len = dyn_cast<ConstantInt>(M->getLength());
1909 if (Len == nullptr)
1910 return false;
1911 if (performStackMoveOptzn(M, M, M->getDest(), M->getSource(),
1912 TypeSize::getFixed(Len->getZExtValue()), BAA)) {
1913 // Avoid invalidating the iterator.
1914 BBI = M->getNextNode()->getIterator();
1916 ++NumMemCpyInstr;
1917 return true;
1918 }
1919
1920 return false;
1921}
1922
1923/// Memmove calls with overlapping src/dest buffers that come after a memset may
1924/// be removed.
1925bool MemCpyOptPass::isMemMoveMemSetDependency(MemMoveInst *M) {
1926 const auto &DL = M->getDataLayout();
1927 MemoryUseOrDef *MemMoveAccess = MSSA->getMemoryAccess(M);
1928 if (!MemMoveAccess)
1929 return false;
1930
1931 // The memmove is of form memmove(x, x + A, B).
1932 MemoryLocation SourceLoc = MemoryLocation::getForSource(M);
1933 auto *MemMoveSourceOp = M->getSource();
1934 auto *Source = dyn_cast<GEPOperator>(MemMoveSourceOp);
1935 if (!Source)
1936 return false;
1937
1938 APInt Offset(DL.getIndexTypeSizeInBits(Source->getType()), 0);
1939 LocationSize MemMoveLocSize = SourceLoc.Size;
1940 if (Source->getPointerOperand() != M->getDest() ||
1941 !MemMoveLocSize.hasValue() ||
1942 !Source->accumulateConstantOffset(DL, Offset) || Offset.isNegative()) {
1943 return false;
1944 }
1945
1946 uint64_t MemMoveSize = MemMoveLocSize.getValue();
1947 LocationSize TotalSize =
1948 LocationSize::precise(Offset.getZExtValue() + MemMoveSize);
1949 MemoryLocation CombinedLoc(M->getDest(), TotalSize);
1950
1951 // The first dominating clobbering MemoryAccess for the combined location
1952 // needs to be a memset.
1953 BatchAAResults BAA(*AA);
1954 MemoryAccess *FirstDef = MemMoveAccess->getDefiningAccess();
1955 auto *DestClobber = dyn_cast<MemoryDef>(
1956 MSSA->getWalker()->getClobberingMemoryAccess(FirstDef, CombinedLoc, BAA));
1957 if (!DestClobber)
1958 return false;
1959
1960 auto *MS = dyn_cast_or_null<MemSetInst>(DestClobber->getMemoryInst());
1961 if (!MS)
1962 return false;
1963
1964 // Memset length must be sufficiently large.
1965 auto *MemSetLength = dyn_cast<ConstantInt>(MS->getLength());
1966 if (!MemSetLength || MemSetLength->getZExtValue() < MemMoveSize)
1967 return false;
1968
1969 // The destination buffer must have been memset'd.
1970 if (!BAA.isMustAlias(MS->getDest(), M->getDest()))
1971 return false;
1972
1973 return true;
1974}
1975
1976/// Transforms memmove calls to memcpy calls when the src/dst are guaranteed
1977/// not to alias.
1978bool MemCpyOptPass::processMemMove(MemMoveInst *M, BasicBlock::iterator &BBI) {
1979 // See if the source could be modified by this memmove potentially.
1980 if (isModSet(AA->getModRefInfo(M, MemoryLocation::getForSource(M)))) {
1981 // On the off-chance the memmove clobbers src with previously memset'd
1982 // bytes, the memmove may be redundant.
1983 if (!M->isVolatile() && isMemMoveMemSetDependency(M)) {
1984 LLVM_DEBUG(dbgs() << "Removed redundant memmove.\n");
1985 ++BBI;
1987 ++NumMemMoveInstr;
1988 return true;
1989 }
1990 return false;
1991 }
1992
1993 LLVM_DEBUG(dbgs() << "MemCpyOptPass: Optimizing memmove -> memcpy: " << *M
1994 << "\n");
1995
1996 // If not, then we know we can transform this.
1997 Type *ArgTys[3] = {M->getRawDest()->getType(), M->getRawSource()->getType(),
1998 M->getLength()->getType()};
1999 M->setCalledFunction(Intrinsic::getOrInsertDeclaration(
2000 M->getModule(), Intrinsic::memcpy, ArgTys));
2001
2002 // For MemorySSA nothing really changes (except that memcpy may imply stricter
2003 // aliasing guarantees).
2004
2005 ++NumMoveToCpy;
2006 return true;
2007}
2008
2009/// This is called on every byval argument in call sites.
2010bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) {
2011 const DataLayout &DL = CB.getDataLayout();
2012 // Find out what feeds this byval argument.
2013 Value *ByValArg = CB.getArgOperand(ArgNo);
2014 Type *ByValTy = CB.getParamByValType(ArgNo);
2015 TypeSize ByValSize = DL.getTypeAllocSize(ByValTy);
2016 MemoryLocation Loc(ByValArg, LocationSize::precise(ByValSize));
2017 MemoryUseOrDef *CallAccess = MSSA->getMemoryAccess(&CB);
2018 if (!CallAccess)
2019 return false;
2020 MemCpyInst *MDep = nullptr;
2021 BatchAAResults BAA(*AA, EEA);
2022 MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
2023 CallAccess->getDefiningAccess(), Loc, BAA);
2024 if (auto *MD = dyn_cast<MemoryDef>(Clobber))
2025 MDep = dyn_cast_or_null<MemCpyInst>(MD->getMemoryInst());
2026
2027 // If the byval argument isn't fed by a memcpy, ignore it. If it is fed by
2028 // a memcpy, see if we can byval from the source of the memcpy instead of the
2029 // result.
2030 if (!MDep || MDep->isVolatile() ||
2031 ByValArg->stripPointerCasts() != MDep->getDest())
2032 return false;
2033
2034 // The length of the memcpy must be larger or equal to the size of the byval.
2035 auto *C1 = dyn_cast<ConstantInt>(MDep->getLength());
2036 if (!C1 || !TypeSize::isKnownGE(
2037 TypeSize::getFixed(C1->getValue().getZExtValue()), ByValSize))
2038 return false;
2039
2040 // Get the alignment of the byval. If the call doesn't specify the alignment,
2041 // then it is some target specific value that we can't know.
2042 MaybeAlign ByValAlign = CB.getParamAlign(ArgNo);
2043 if (!ByValAlign)
2044 return false;
2045
2046 // If it is greater than the memcpy, then we check to see if we can force the
2047 // source of the memcpy to the alignment we need. If we fail, we bail out.
2048 MaybeAlign MemDepAlign = MDep->getSourceAlign();
2049 if ((!MemDepAlign || *MemDepAlign < *ByValAlign) &&
2050 getOrEnforceKnownAlignment(MDep->getSource(), ByValAlign, DL, &CB, AC,
2051 DT) < *ByValAlign)
2052 return false;
2053
2054 // The type of the memcpy source must match the byval argument
2055 if (MDep->getSource()->getType() != ByValArg->getType())
2056 return false;
2057
2058 // Verify that the copied-from memory doesn't change in between the memcpy and
2059 // the byval call.
2060 // memcpy(a <- b)
2061 // *b = 42;
2062 // foo(*a)
2063 // It would be invalid to transform the second memcpy into foo(*b).
2064 if (writtenBetween(MSSA, BAA, MemoryLocation::getForSource(MDep),
2065 MSSA->getMemoryAccess(MDep), CallAccess))
2066 return false;
2067
2068 LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy to byval:\n"
2069 << " " << *MDep << "\n"
2070 << " " << CB << "\n");
2071
2072 // Otherwise we're good! Update the byval argument.
2073 combineAAMetadata(&CB, MDep);
2074 CB.setArgOperand(ArgNo, MDep->getSource());
2075 ++NumMemCpyInstr;
2076 return true;
2077}
2078
2079/// This is called on memcpy dest pointer arguments attributed as immutable
2080/// during call. Try to use memcpy source directly if all of the following
2081/// conditions are satisfied.
2082/// 1. The memcpy dst is neither modified during the call nor captured by the
2083/// call.
2084/// 2. The memcpy dst is an alloca with known alignment & size.
2085/// 2-1. The memcpy length == the alloca size which ensures that the new
2086/// pointer is dereferenceable for the required range
2087/// 2-2. The src pointer has alignment >= the alloca alignment or can be
2088/// enforced so.
2089/// 3. The memcpy dst and src is not modified between the memcpy and the call.
2090/// (if MSSA clobber check is safe.)
2091/// 4. The memcpy src is not modified during the call. (ModRef check shows no
2092/// Mod.)
2093bool MemCpyOptPass::processImmutArgument(CallBase &CB, unsigned ArgNo) {
2094 BatchAAResults BAA(*AA, EEA);
2095 Value *ImmutArg = CB.getArgOperand(ArgNo);
2096
2097 // 1. Ensure passed argument is immutable during call.
2098 if (!CB.doesNotCapture(ArgNo))
2099 return false;
2100
2101 // We know that the argument is readonly at this point, but the function
2102 // might still modify the same memory through a different pointer. Exclude
2103 // this either via noalias, or alias analysis.
2104 if (!CB.paramHasAttr(ArgNo, Attribute::NoAlias) &&
2105 isModSet(
2107 return false;
2108
2109 const DataLayout &DL = CB.getDataLayout();
2110
2111 // 2. Check that arg is alloca
2112 // TODO: Even if the arg gets back to branches, we can remove memcpy if all
2113 // the alloca alignments can be enforced to source alignment.
2114 auto *AI = dyn_cast<AllocaInst>(ImmutArg->stripPointerCasts());
2115 if (!AI)
2116 return false;
2117
2118 std::optional<TypeSize> AllocaSize = AI->getAllocationSize(DL);
2119 // Can't handle unknown size alloca.
2120 // (e.g. Variable Length Array, Scalable Vector)
2121 if (!AllocaSize || AllocaSize->isScalable())
2122 return false;
2123 MemoryLocation Loc(ImmutArg, LocationSize::precise(*AllocaSize));
2124 MemoryUseOrDef *CallAccess = MSSA->getMemoryAccess(&CB);
2125 if (!CallAccess)
2126 return false;
2127
2128 MemCpyInst *MDep = nullptr;
2129 MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
2130 CallAccess->getDefiningAccess(), Loc, BAA);
2131 if (auto *MD = dyn_cast<MemoryDef>(Clobber))
2132 MDep = dyn_cast_or_null<MemCpyInst>(MD->getMemoryInst());
2133
2134 // If the immut argument isn't fed by a memcpy, ignore it. If it is fed by
2135 // a memcpy, check that the arg equals the memcpy dest.
2136 if (!MDep || MDep->isVolatile() || AI != MDep->getDest())
2137 return false;
2138
2139 // The type of the memcpy source must match the immut argument
2140 if (MDep->getSource()->getType() != ImmutArg->getType())
2141 return false;
2142
2143 // 2-1. The length of the memcpy must be equal to the size of the alloca.
2144 auto *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
2145 if (!MDepLen || AllocaSize != MDepLen->getValue())
2146 return false;
2147
2148 // 2-2. the memcpy source align must be larger than or equal the alloca's
2149 // align. If not so, we check to see if we can force the source of the memcpy
2150 // to the alignment we need. If we fail, we bail out.
2151 Align MemDepAlign = MDep->getSourceAlign().valueOrOne();
2152 Align AllocaAlign = AI->getAlign();
2153 if (MemDepAlign < AllocaAlign &&
2154 getOrEnforceKnownAlignment(MDep->getSource(), AllocaAlign, DL, &CB, AC,
2155 DT) < AllocaAlign)
2156 return false;
2157
2158 // 3. Verify that the source doesn't change in between the memcpy and
2159 // the call.
2160 // memcpy(a <- b)
2161 // *b = 42;
2162 // foo(*a)
2163 // It would be invalid to transform the second memcpy into foo(*b).
2164 if (writtenBetween(MSSA, BAA, MemoryLocation::getForSource(MDep),
2165 MSSA->getMemoryAccess(MDep), CallAccess))
2166 return false;
2167
2168 // 4. The memcpy src must not be modified during the call.
2170 return false;
2171
2172 LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy to Immut src:\n"
2173 << " " << *MDep << "\n"
2174 << " " << CB << "\n");
2175
2176 // Otherwise we're good! Update the immut argument.
2177 combineAAMetadata(&CB, MDep);
2178 CB.setArgOperand(ArgNo, MDep->getSource());
2179 ++NumMemCpyInstr;
2180 return true;
2181}
2182
2183/// Executes one iteration of MemCpyOptPass.
2184bool MemCpyOptPass::iterateOnFunction(Function &F) {
2185 bool MadeChange = false;
2186
2187 // Walk all instruction in the function.
2188 for (BasicBlock &BB : F) {
2189 // Skip unreachable blocks. For example processStore assumes that an
2190 // instruction in a BB can't be dominated by a later instruction in the
2191 // same BB (which is a scenario that can happen for an unreachable BB that
2192 // has itself as a predecessor).
2193 if (!DT->isReachableFromEntry(&BB))
2194 continue;
2195
2196 for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) {
2197 // Avoid invalidating the iterator.
2198 Instruction *I = &*BI++;
2199
2200 bool RepeatInstruction = false;
2201
2202 if (auto *SI = dyn_cast<StoreInst>(I))
2203 MadeChange |= processStore(SI, BI);
2204 else if (auto *M = dyn_cast<MemSetInst>(I))
2205 RepeatInstruction = processMemSet(M, BI);
2206 else if (auto *M = dyn_cast<MemCpyInst>(I))
2207 RepeatInstruction = processMemCpy(M, BI);
2208 else if (auto *M = dyn_cast<MemMoveInst>(I))
2209 RepeatInstruction = processMemMove(M, BI);
2210 else if (auto *CB = dyn_cast<CallBase>(I)) {
2211 for (unsigned i = 0, e = CB->arg_size(); i != e; ++i) {
2212 if (CB->isByValArgument(i))
2213 MadeChange |= processByValArgument(*CB, i);
2214 else if (CB->onlyReadsMemory(i))
2215 MadeChange |= processImmutArgument(*CB, i);
2216 }
2217 }
2218
2219 // Reprocess the instruction if desired.
2220 if (RepeatInstruction) {
2221 if (BI != BB.begin())
2222 --BI;
2223 MadeChange = true;
2224 }
2225 }
2226 }
2227
2228 return MadeChange;
2229}
2230
2232 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
2233 auto *AA = &AM.getResult<AAManager>(F);
2234 auto *AC = &AM.getResult<AssumptionAnalysis>(F);
2235 auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
2236 auto *PDT = &AM.getResult<PostDominatorTreeAnalysis>(F);
2237 auto *MSSA = &AM.getResult<MemorySSAAnalysis>(F);
2238
2239 bool MadeChange = runImpl(F, &TLI, AA, AC, DT, PDT, &MSSA->getMSSA());
2240 if (!MadeChange)
2241 return PreservedAnalyses::all();
2242
2246 return PA;
2247}
2248
2249bool MemCpyOptPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
2250 AliasAnalysis *AA_, AssumptionCache *AC_,
2251 DominatorTree *DT_, PostDominatorTree *PDT_,
2252 MemorySSA *MSSA_) {
2253 bool MadeChange = false;
2254 TLI = TLI_;
2255 AA = AA_;
2256 AC = AC_;
2257 DT = DT_;
2258 PDT = PDT_;
2259 MSSA = MSSA_;
2260 MemorySSAUpdater MSSAU_(MSSA_);
2261 MSSAU = &MSSAU_;
2262 EarliestEscapeAnalysis EEA_(*DT);
2263 EEA = &EEA_;
2264
2265 while (true) {
2266 if (!iterateOnFunction(F))
2267 break;
2268 MadeChange = true;
2269 }
2270
2271 if (VerifyMemorySSA)
2272 MSSA_->verifyMemorySSA();
2273
2274 return MadeChange;
2275}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseSet and SmallDenseSet classes.
static bool runImpl(Function &F, const TargetLowering &TLI, const LibcallLoweringInfo &Libcalls, AssumptionCache *AC)
#define DEBUG_TYPE
This is the interface for a simple mod/ref and alias analysis over globals.
Hexagon Common GEP
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU)
Definition LICM.cpp:1457
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
static bool mayBeVisibleThroughUnwinding(Value *V, Instruction *Start, Instruction *End)
static bool isZeroSize(Value *Size)
static bool hasUndefContents(MemorySSA *MSSA, BatchAAResults &AA, Value *V, MemoryDef *Def)
Determine whether the pointer V had only undefined content (due to Def), either because it was freshl...
static bool accessedBetween(BatchAAResults &AA, MemoryLocation Loc, const MemoryUseOrDef *Start, const MemoryUseOrDef *End, Instruction **SkippedLifetimeStart=nullptr)
static bool overreadUndefContents(MemorySSA *MSSA, MemCpyInst *MemCpy, MemIntrinsic *MemSrc, BatchAAResults &BAA)
static cl::opt< bool > EnableMemCpyOptWithoutLibcalls("enable-memcpyopt-without-libcalls", cl::Hidden, cl::desc("Enable memcpyopt even when libcalls are disabled"))
static bool writtenBetween(MemorySSA *MSSA, BatchAAResults &AA, MemoryLocation Loc, const MemoryUseOrDef *Start, const MemoryUseOrDef *End)
This file provides utility analysis objects describing memory locations.
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
static void addRange(SmallVectorImpl< ConstantInt * > &EndPoints, ConstantInt *Low, ConstantInt *High)
#define T
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
#define P(N)
if(PassOpts->AAPipeline)
This file contains the declarations for profiling metadata utility functions.
This file contains some templates that are useful if you are working with the STL at all.
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:119
A manager for alias analyses.
LLVM_ABI bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size.
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
void setAllocatedType(Type *Ty)
for use only in special circumstances that need to generically transform a whole instruction (eg: IR ...
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
unsigned getAddressSpace() const
Return the address space for the allocation.
LLVM_ABI std::optional< TypeSize > getAllocationSize(const DataLayout &DL) const
Get allocation size in bytes.
void setAlignment(Align Align)
const Value * getArraySize() const
Get the number of elements allocated.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
iterator end()
Definition BasicBlock.h:474
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:461
LLVM_ABI bool isEntryBlock() const
Return true if this is the entry block of the containing function.
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
ModRefInfo callCapturesBefore(const Instruction *I, const MemoryLocation &MemLoc, DominatorTree *DT)
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
bool doesNotCapture(unsigned OpNo) const
Determine whether this data operand is not captured.
LLVM_ABI bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
bool isByValArgument(unsigned ArgNo) const
Determine whether this argument is passed by value.
MaybeAlign getParamAlign(unsigned ArgNo) const
Extract the alignment for a call or parameter (0=unknown).
bool onlyReadsMemory(unsigned OpNo) const
Type * getParamByValType(unsigned ArgNo) const
Extract the byval type for a call or parameter.
Value * getArgOperand(unsigned i) const
void setArgOperand(unsigned i, Value *v)
unsigned arg_size() const
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
Analysis pass which computes a DominatorTree.
Definition Dominators.h:278
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:159
Context-sensitive CaptureAnalysis provider, which computes and caches the earliest common dominator c...
LLVM_ABI void mergeDIAssignID(ArrayRef< const Instruction * > SourceInstructions)
Merge the DIAssignID metadata from this instruction and those attached to instructions in SourceInstr...
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI void dropUnknownNonDebugMetadata(ArrayRef< unsigned > KnownIDs={})
Drop all unknown metadata except for debug locations.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Value * getPointerOperand()
bool isSimple() const
Align getAlign() const
Return the alignment of the access that is being performed.
bool hasValue() const
static LocationSize precise(uint64_t Value)
TypeSize getValue() const
This class wraps the llvm.memcpy intrinsic.
LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Value * getLength() const
Value * getRawDest() const
Value * getDest() const
This is just like getRawDest, but it strips off any cast instructions (including addrspacecast) that ...
MaybeAlign getDestAlign() const
This is the common base class for memset/memcpy/memmove.
bool isVolatile() const
Value * getValue() const
Value * getRawSource() const
Return the arguments to the instruction.
MaybeAlign getSourceAlign() const
Value * getSource() const
This is just like getRawSource, but it strips off any cast instructions that feed it,...
BasicBlock * getBlock() const
Definition MemorySSA.h:162
AllAccessType::self_iterator getIterator()
Get the iterators for the all access list and the defs only list We default to the all access list.
Definition MemorySSA.h:181
Represents a read-write access to memory, whether it is a must-alias, or a may-alias.
Definition MemorySSA.h:371
Representation for a specific memory location.
static LLVM_ABI MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
static LLVM_ABI MemoryLocation getForSource(const MemTransferInst *MTI)
Return a location representing the source of a memory transfer.
LocationSize Size
The maximum size of the location, in address-units, or UnknownSize if the size is not known.
static MemoryLocation getBeforeOrAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location before or after Ptr, while remaining within the underl...
static LLVM_ABI MemoryLocation getForDest(const MemIntrinsic *MI)
Return a location representing the destination of a memory set or transfer.
An analysis that produces MemorySSA for a function.
Definition MemorySSA.h:922
MemoryAccess * getClobberingMemoryAccess(const Instruction *I, BatchAAResults &AA)
Given a memory Mod/Ref/ModRef'ing instruction, calling this will give you the nearest dominating Memo...
Definition MemorySSA.h:1035
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition MemorySSA.h:702
LLVM_ABI bool dominates(const MemoryAccess *A, const MemoryAccess *B) const
Given two memory accesses in potentially different blocks, determine whether MemoryAccess A dominates...
LLVM_ABI void verifyMemorySSA(VerificationLevel=VerificationLevel::Fast) const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
LLVM_ABI MemorySSAWalker * getWalker()
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
Definition MemorySSA.h:720
bool isLiveOnEntryDef(const MemoryAccess *MA) const
Return true if MA represents the live on entry value.
Definition MemorySSA.h:740
Class that has the common methods + fields of memory uses/defs.
Definition MemorySSA.h:250
MemoryAccess * getDefiningAccess() const
Get the access that produces the memory state used by this Use.
Definition MemorySSA.h:260
Instruction * getMemoryInst() const
Get the instruction that this MemoryUse represents.
Definition MemorySSA.h:257
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition Module.h:281
Analysis pass which computes a PostDominatorTree.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132
size_type size() const
Definition SmallPtrSet.h:99
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
void reserve(size_type N)
typename SuperClass::const_iterator const_iterator
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
typename SuperClass::iterator iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
LLVM_ABI unsigned getIntegerBitWidth() const
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:368
void setOperand(unsigned i, Value *Val)
Definition User.h:212
Value * getOperand(unsigned i) const
Definition User.h:207
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:549
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition Value.cpp:709
bool use_empty() const
Definition Value.h:346
LLVM_ABI std::optional< int64_t > getPointerOffsetFrom(const Value *Other, const DataLayout &DL) const
If this ptr is provably equal to Other plus a constant offset, return that offset in bytes.
Definition Value.cpp:1059
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
const ParentTy * getParent() const
Definition ilist_node.h:34
reverse_self_iterator getReverseIterator()
Definition ilist_node.h:126
self_iterator getIterator()
Definition ilist_node.h:123
CallInst * Call
Changed
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
Abstract Attribute helper functions.
Definition Attributor.h:165
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
@ User
could "use" a pointer
bool empty() const
Definition BasicBlock.h:101
iterator end() const
Definition BasicBlock.h:89
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
LLVM_ABI iterator begin() const
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:558
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool capturesAddress(CaptureComponents CC)
Definition ModRef.h:387
LLVM_ABI void setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I, StringRef PassName, const Function *F=nullptr)
Like setExplicitlyUnknownBranchWeights(...), but only sets unknown branch weights in the new instruct...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
scope_exit(Callable) -> scope_exit< Callable >
LLVM_ABI bool isDereferenceableAndAlignedPointer(const Value *V, Type *Ty, Align Alignment, const DataLayout &DL, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Returns true if V is always a dereferenceable pointer with alignment greater or equal than requested.
Definition Loads.cpp:230
auto partition_point(R &&Range, Predicate P)
Binary search for the first iterator in a range where a predicate is false.
Definition STLExtras.h:2128
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2207
auto cast_or_null(const Y &Val)
Definition Casting.h:714
LLVM_ABI unsigned getDefaultMaxUsesToExploreForCaptureTracking()
getDefaultMaxUsesToExploreForCaptureTracking - Return default value of the maximal number of uses to ...
LLVM_ABI bool PointerMayBeCapturedBefore(const Value *V, bool ReturnCaptures, const Instruction *I, const DominatorTree *DT, bool IncludeI=false, unsigned MaxUsesToExplore=0, const LoopInfo *LI=nullptr)
PointerMayBeCapturedBefore - Return true if this pointer value may be captured by the enclosing funct...
LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
auto reverse(ContainerTy &&C)
Definition STLExtras.h:407
LLVM_ABI Align getOrEnforceKnownAlignment(Value *V, MaybeAlign PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
Definition Local.cpp:1581
bool isModSet(const ModRefInfo MRI)
Definition ModRef.h:49
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
bool isModOrRefSet(const ModRefInfo MRI)
Definition ModRef.h:43
LLVM_ABI bool isNotVisibleOnUnwind(const Value *Object, bool &RequiresNoCaptureBeforeUnwind)
Return true if Object memory is not visible after an unwind, in the sense that program semantics cann...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
Definition ModRef.h:28
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition MemorySSA.cpp:84
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
DWARFExpression::Operation Op
LLVM_ABI bool isPotentiallyReachableFromMany(SmallVectorImpl< BasicBlock * > &Worklist, const BasicBlock *StopBB, const SmallPtrSetImpl< BasicBlock * > *ExclusionSet, const DominatorTree *DT=nullptr, const LoopInfo *LI=nullptr, const CycleInfo *CI=nullptr)
Determine whether there is at least one path from a block in 'Worklist' to 'StopBB' without passing t...
Definition CFG.cpp:293
LLVM_ABI bool isIdentifiedFunctionLocal(const Value *V)
Return true if V is umabigously identified at the function-level.
LLVM_ABI bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI Value * isBytewiseValue(Value *V, const DataLayout &DL)
If the specified value can be set by repeating the same byte in memory, return the i8 value that it i...
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
LLVM_ABI void combineAAMetadata(Instruction *K, const Instruction *J)
Combine metadata of two instructions, where instruction J is a memory access that has been merged int...
Definition Local.cpp:3136
bool capturesAnything(CaptureComponents CC)
Definition ModRef.h:379
LLVM_ABI UseCaptureInfo DetermineUseCaptureKind(const Use &U, const Value *Base)
Determine what kind of capture behaviour U may exhibit.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
bool capturesAnyProvenance(CaptureComponents CC)
Definition ModRef.h:400
bool isRefSet(const ModRefInfo MRI)
Definition ModRef.h:52
LLVM_ABI bool isWritableObject(const Value *Object, bool &ExplicitlyDereferenceableOnly)
Return true if the Object is writable, in the sense that any location based on this pointer that can ...
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Definition Alignment.h:130
CaptureComponents UseCC
Components captured by this use.
CaptureComponents ResultCC
Components captured by the return value of the user of this Use.