LLVM 17.0.0git
MemCpyOptimizer.cpp
Go to the documentation of this file.
1//===- MemCpyOptimizer.cpp - Optimize use of memcpy and friends -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass performs various transformations related to eliminating memcpy
10// calls, or transforming sets of stores into memset's.
11//
12//===----------------------------------------------------------------------===//
13
15#include "llvm/ADT/DenseSet.h"
16#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/Statistic.h"
24#include "llvm/Analysis/Loads.h"
30#include "llvm/IR/BasicBlock.h"
31#include "llvm/IR/Constants.h"
32#include "llvm/IR/DataLayout.h"
34#include "llvm/IR/Dominators.h"
35#include "llvm/IR/Function.h"
37#include "llvm/IR/IRBuilder.h"
38#include "llvm/IR/InstrTypes.h"
39#include "llvm/IR/Instruction.h"
42#include "llvm/IR/Intrinsics.h"
43#include "llvm/IR/LLVMContext.h"
44#include "llvm/IR/Module.h"
45#include "llvm/IR/PassManager.h"
46#include "llvm/IR/Type.h"
47#include "llvm/IR/User.h"
48#include "llvm/IR/Value.h"
50#include "llvm/Pass.h"
52#include "llvm/Support/Debug.h"
57#include <algorithm>
58#include <cassert>
59#include <cstdint>
60#include <optional>
61
62using namespace llvm;
63
64#define DEBUG_TYPE "memcpyopt"
65
67 "enable-memcpyopt-without-libcalls", cl::Hidden,
68 cl::desc("Enable memcpyopt even when libcalls are disabled"));
69
70STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted");
71STATISTIC(NumMemSetInfer, "Number of memsets inferred");
72STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy");
73STATISTIC(NumCpyToSet, "Number of memcpys converted to memset");
74STATISTIC(NumCallSlot, "Number of call slot optimizations performed");
75
76namespace {
77
78/// Represents a range of memset'd bytes with the ByteVal value.
79/// This allows us to analyze stores like:
80/// store 0 -> P+1
81/// store 0 -> P+0
82/// store 0 -> P+3
83/// store 0 -> P+2
84/// which sometimes happens with stores to arrays of structs etc. When we see
85/// the first store, we make a range [1, 2). The second store extends the range
86/// to [0, 2). The third makes a new range [2, 3). The fourth store joins the
87/// two ranges into [0, 3) which is memset'able.
88struct MemsetRange {
89 // Start/End - A semi range that describes the span that this range covers.
90 // The range is closed at the start and open at the end: [Start, End).
91 int64_t Start, End;
92
93 /// StartPtr - The getelementptr instruction that points to the start of the
94 /// range.
95 Value *StartPtr;
96
97 /// Alignment - The known alignment of the first store.
98 MaybeAlign Alignment;
99
100 /// TheStores - The actual stores that make up this range.
102
103 bool isProfitableToUseMemset(const DataLayout &DL) const;
104};
105
106} // end anonymous namespace
107
108bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const {
109 // If we found more than 4 stores to merge or 16 bytes, use memset.
110 if (TheStores.size() >= 4 || End-Start >= 16) return true;
111
112 // If there is nothing to merge, don't do anything.
113 if (TheStores.size() < 2) return false;
114
115 // If any of the stores are a memset, then it is always good to extend the
116 // memset.
117 for (Instruction *SI : TheStores)
118 if (!isa<StoreInst>(SI))
119 return true;
120
121 // Assume that the code generator is capable of merging pairs of stores
122 // together if it wants to.
123 if (TheStores.size() == 2) return false;
124
125 // If we have fewer than 8 stores, it can still be worthwhile to do this.
126 // For example, merging 4 i8 stores into an i32 store is useful almost always.
127 // However, merging 2 32-bit stores isn't useful on a 32-bit architecture (the
128 // memset will be split into 2 32-bit stores anyway) and doing so can
129 // pessimize the llvm optimizer.
130 //
131 // Since we don't have perfect knowledge here, make some assumptions: assume
132 // the maximum GPR width is the same size as the largest legal integer
133 // size. If so, check to see whether we will end up actually reducing the
134 // number of stores used.
135 unsigned Bytes = unsigned(End-Start);
136 unsigned MaxIntSize = DL.getLargestLegalIntTypeSizeInBits() / 8;
137 if (MaxIntSize == 0)
138 MaxIntSize = 1;
139 unsigned NumPointerStores = Bytes / MaxIntSize;
140
141 // Assume the remaining bytes if any are done a byte at a time.
142 unsigned NumByteStores = Bytes % MaxIntSize;
143
144 // If we will reduce the # stores (according to this heuristic), do the
145 // transformation. This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32
146 // etc.
147 return TheStores.size() > NumPointerStores+NumByteStores;
148}
149
150namespace {
151
152class MemsetRanges {
153 using range_iterator = SmallVectorImpl<MemsetRange>::iterator;
154
155 /// A sorted list of the memset ranges.
157
158 const DataLayout &DL;
159
160public:
161 MemsetRanges(const DataLayout &DL) : DL(DL) {}
162
164
165 const_iterator begin() const { return Ranges.begin(); }
166 const_iterator end() const { return Ranges.end(); }
167 bool empty() const { return Ranges.empty(); }
168
169 void addInst(int64_t OffsetFromFirst, Instruction *Inst) {
170 if (auto *SI = dyn_cast<StoreInst>(Inst))
171 addStore(OffsetFromFirst, SI);
172 else
173 addMemSet(OffsetFromFirst, cast<MemSetInst>(Inst));
174 }
175
176 void addStore(int64_t OffsetFromFirst, StoreInst *SI) {
177 TypeSize StoreSize = DL.getTypeStoreSize(SI->getOperand(0)->getType());
178 assert(!StoreSize.isScalable() && "Can't track scalable-typed stores");
179 addRange(OffsetFromFirst, StoreSize.getFixedValue(),
180 SI->getPointerOperand(), SI->getAlign(), SI);
181 }
182
183 void addMemSet(int64_t OffsetFromFirst, MemSetInst *MSI) {
184 int64_t Size = cast<ConstantInt>(MSI->getLength())->getZExtValue();
185 addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getDestAlign(), MSI);
186 }
187
188 void addRange(int64_t Start, int64_t Size, Value *Ptr, MaybeAlign Alignment,
189 Instruction *Inst);
190};
191
192} // end anonymous namespace
193
194/// Add a new store to the MemsetRanges data structure. This adds a
195/// new range for the specified store at the specified offset, merging into
196/// existing ranges as appropriate.
197void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,
198 MaybeAlign Alignment, Instruction *Inst) {
199 int64_t End = Start+Size;
200
201 range_iterator I = partition_point(
202 Ranges, [=](const MemsetRange &O) { return O.End < Start; });
203
204 // We now know that I == E, in which case we didn't find anything to merge
205 // with, or that Start <= I->End. If End < I->Start or I == E, then we need
206 // to insert a new range. Handle this now.
207 if (I == Ranges.end() || End < I->Start) {
208 MemsetRange &R = *Ranges.insert(I, MemsetRange());
209 R.Start = Start;
210 R.End = End;
211 R.StartPtr = Ptr;
212 R.Alignment = Alignment;
213 R.TheStores.push_back(Inst);
214 return;
215 }
216
217 // This store overlaps with I, add it.
218 I->TheStores.push_back(Inst);
219
220 // At this point, we may have an interval that completely contains our store.
221 // If so, just add it to the interval and return.
222 if (I->Start <= Start && I->End >= End)
223 return;
224
225 // Now we know that Start <= I->End and End >= I->Start so the range overlaps
226 // but is not entirely contained within the range.
227
228 // See if the range extends the start of the range. In this case, it couldn't
229 // possibly cause it to join the prior range, because otherwise we would have
230 // stopped on *it*.
231 if (Start < I->Start) {
232 I->Start = Start;
233 I->StartPtr = Ptr;
234 I->Alignment = Alignment;
235 }
236
237 // Now we know that Start <= I->End and Start >= I->Start (so the startpoint
238 // is in or right at the end of I), and that End >= I->Start. Extend I out to
239 // End.
240 if (End > I->End) {
241 I->End = End;
242 range_iterator NextI = I;
243 while (++NextI != Ranges.end() && End >= NextI->Start) {
244 // Merge the range in.
245 I->TheStores.append(NextI->TheStores.begin(), NextI->TheStores.end());
246 if (NextI->End > I->End)
247 I->End = NextI->End;
248 Ranges.erase(NextI);
249 NextI = I;
250 }
251 }
252}
253
254//===----------------------------------------------------------------------===//
255// MemCpyOptLegacyPass Pass
256//===----------------------------------------------------------------------===//
257
258// Check that V is either not accessible by the caller, or unwinding cannot
259// occur between Start and End.
261 Instruction *End) {
262 assert(Start->getParent() == End->getParent() && "Must be in same block");
263 // Function can't unwind, so it also can't be visible through unwinding.
264 if (Start->getFunction()->doesNotThrow())
265 return false;
266
267 // Object is not visible on unwind.
268 // TODO: Support RequiresNoCaptureBeforeUnwind case.
269 bool RequiresNoCaptureBeforeUnwind;
271 RequiresNoCaptureBeforeUnwind) &&
272 !RequiresNoCaptureBeforeUnwind)
273 return false;
274
275 // Check whether there are any unwinding instructions in the range.
276 return any_of(make_range(Start->getIterator(), End->getIterator()),
277 [](const Instruction &I) { return I.mayThrow(); });
278}
279
280void MemCpyOptPass::eraseInstruction(Instruction *I) {
281 MSSAU->removeMemoryAccess(I);
282 I->eraseFromParent();
283}
284
285// Check for mod or ref of Loc between Start and End, excluding both boundaries.
286// Start and End must be in the same block.
287// If SkippedLifetimeStart is provided, skip over one clobbering lifetime.start
288// intrinsic and store it inside SkippedLifetimeStart.
290 const MemoryUseOrDef *Start,
291 const MemoryUseOrDef *End,
292 Instruction **SkippedLifetimeStart = nullptr) {
293 assert(Start->getBlock() == End->getBlock() && "Only local supported");
294 for (const MemoryAccess &MA :
295 make_range(++Start->getIterator(), End->getIterator())) {
296 Instruction *I = cast<MemoryUseOrDef>(MA).getMemoryInst();
297 if (isModOrRefSet(AA.getModRefInfo(I, Loc))) {
298 auto *II = dyn_cast<IntrinsicInst>(I);
299 if (II && II->getIntrinsicID() == Intrinsic::lifetime_start &&
300 SkippedLifetimeStart && !*SkippedLifetimeStart) {
301 *SkippedLifetimeStart = I;
302 continue;
303 }
304
305 return true;
306 }
307 }
308 return false;
309}
310
311// Check for mod of Loc between Start and End, excluding both boundaries.
312// Start and End can be in different blocks.
314 MemoryLocation Loc, const MemoryUseOrDef *Start,
315 const MemoryUseOrDef *End) {
316 if (isa<MemoryUse>(End)) {
317 // For MemoryUses, getClobberingMemoryAccess may skip non-clobbering writes.
318 // Manually check read accesses between Start and End, if they are in the
319 // same block, for clobbers. Otherwise assume Loc is clobbered.
320 return Start->getBlock() != End->getBlock() ||
321 any_of(
322 make_range(std::next(Start->getIterator()), End->getIterator()),
323 [&AA, Loc](const MemoryAccess &Acc) {
324 if (isa<MemoryUse>(&Acc))
325 return false;
326 Instruction *AccInst =
327 cast<MemoryUseOrDef>(&Acc)->getMemoryInst();
328 return isModSet(AA.getModRefInfo(AccInst, Loc));
329 });
330 }
331
332 // TODO: Only walk until we hit Start.
334 End->getDefiningAccess(), Loc, AA);
335 return !MSSA->dominates(Clobber, Start);
336}
337
338/// When scanning forward over instructions, we look for some other patterns to
339/// fold away. In particular, this looks for stores to neighboring locations of
340/// memory. If it sees enough consecutive ones, it attempts to merge them
341/// together into a memcpy/memset.
342Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
343 Value *StartPtr,
344 Value *ByteVal) {
345 const DataLayout &DL = StartInst->getModule()->getDataLayout();
346
347 // We can't track scalable types
348 if (auto *SI = dyn_cast<StoreInst>(StartInst))
349 if (DL.getTypeStoreSize(SI->getOperand(0)->getType()).isScalable())
350 return nullptr;
351
352 // Okay, so we now have a single store that can be splatable. Scan to find
353 // all subsequent stores of the same value to offset from the same pointer.
354 // Join these together into ranges, so we can decide whether contiguous blocks
355 // are stored.
356 MemsetRanges Ranges(DL);
357
358 BasicBlock::iterator BI(StartInst);
359
360 // Keeps track of the last memory use or def before the insertion point for
361 // the new memset. The new MemoryDef for the inserted memsets will be inserted
362 // after MemInsertPoint. It points to either LastMemDef or to the last user
363 // before the insertion point of the memset, if there are any such users.
364 MemoryUseOrDef *MemInsertPoint = nullptr;
365 // Keeps track of the last MemoryDef between StartInst and the insertion point
366 // for the new memset. This will become the defining access of the inserted
367 // memsets.
368 MemoryDef *LastMemDef = nullptr;
369 for (++BI; !BI->isTerminator(); ++BI) {
370 auto *CurrentAcc = cast_or_null<MemoryUseOrDef>(
371 MSSAU->getMemorySSA()->getMemoryAccess(&*BI));
372 if (CurrentAcc) {
373 MemInsertPoint = CurrentAcc;
374 if (auto *CurrentDef = dyn_cast<MemoryDef>(CurrentAcc))
375 LastMemDef = CurrentDef;
376 }
377
378 // Calls that only access inaccessible memory do not block merging
379 // accessible stores.
380 if (auto *CB = dyn_cast<CallBase>(BI)) {
381 if (CB->onlyAccessesInaccessibleMemory())
382 continue;
383 }
384
385 if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) {
386 // If the instruction is readnone, ignore it, otherwise bail out. We
387 // don't even allow readonly here because we don't want something like:
388 // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A).
389 if (BI->mayWriteToMemory() || BI->mayReadFromMemory())
390 break;
391 continue;
392 }
393
394 if (auto *NextStore = dyn_cast<StoreInst>(BI)) {
395 // If this is a store, see if we can merge it in.
396 if (!NextStore->isSimple()) break;
397
398 Value *StoredVal = NextStore->getValueOperand();
399
400 // Don't convert stores of non-integral pointer types to memsets (which
401 // stores integers).
402 if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()))
403 break;
404
405 // We can't track ranges involving scalable types.
406 if (DL.getTypeStoreSize(StoredVal->getType()).isScalable())
407 break;
408
409 // Check to see if this stored value is of the same byte-splattable value.
410 Value *StoredByte = isBytewiseValue(StoredVal, DL);
411 if (isa<UndefValue>(ByteVal) && StoredByte)
412 ByteVal = StoredByte;
413 if (ByteVal != StoredByte)
414 break;
415
416 // Check to see if this store is to a constant offset from the start ptr.
417 std::optional<int64_t> Offset =
418 isPointerOffset(StartPtr, NextStore->getPointerOperand(), DL);
419 if (!Offset)
420 break;
421
422 Ranges.addStore(*Offset, NextStore);
423 } else {
424 auto *MSI = cast<MemSetInst>(BI);
425
426 if (MSI->isVolatile() || ByteVal != MSI->getValue() ||
427 !isa<ConstantInt>(MSI->getLength()))
428 break;
429
430 // Check to see if this store is to a constant offset from the start ptr.
431 std::optional<int64_t> Offset =
432 isPointerOffset(StartPtr, MSI->getDest(), DL);
433 if (!Offset)
434 break;
435
436 Ranges.addMemSet(*Offset, MSI);
437 }
438 }
439
440 // If we have no ranges, then we just had a single store with nothing that
441 // could be merged in. This is a very common case of course.
442 if (Ranges.empty())
443 return nullptr;
444
445 // If we had at least one store that could be merged in, add the starting
446 // store as well. We try to avoid this unless there is at least something
447 // interesting as a small compile-time optimization.
448 Ranges.addInst(0, StartInst);
449
450 // If we create any memsets, we put it right before the first instruction that
451 // isn't part of the memset block. This ensure that the memset is dominated
452 // by any addressing instruction needed by the start of the block.
453 IRBuilder<> Builder(&*BI);
454
455 // Now that we have full information about ranges, loop over the ranges and
456 // emit memset's for anything big enough to be worthwhile.
457 Instruction *AMemSet = nullptr;
458 for (const MemsetRange &Range : Ranges) {
459 if (Range.TheStores.size() == 1) continue;
460
461 // If it is profitable to lower this range to memset, do so now.
462 if (!Range.isProfitableToUseMemset(DL))
463 continue;
464
465 // Otherwise, we do want to transform this! Create a new memset.
466 // Get the starting pointer of the block.
467 StartPtr = Range.StartPtr;
468
469 AMemSet = Builder.CreateMemSet(StartPtr, ByteVal, Range.End - Range.Start,
470 Range.Alignment);
471 AMemSet->mergeDIAssignID(Range.TheStores);
472
473 LLVM_DEBUG(dbgs() << "Replace stores:\n"; for (Instruction *SI
474 : Range.TheStores) dbgs()
475 << *SI << '\n';
476 dbgs() << "With: " << *AMemSet << '\n');
477 if (!Range.TheStores.empty())
478 AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc());
479
480 assert(LastMemDef && MemInsertPoint &&
481 "Both LastMemDef and MemInsertPoint need to be set");
482 auto *NewDef =
483 cast<MemoryDef>(MemInsertPoint->getMemoryInst() == &*BI
485 AMemSet, LastMemDef, MemInsertPoint)
487 AMemSet, LastMemDef, MemInsertPoint));
488 MSSAU->insertDef(NewDef, /*RenameUses=*/true);
489 LastMemDef = NewDef;
490 MemInsertPoint = NewDef;
491
492 // Zap all the stores.
493 for (Instruction *SI : Range.TheStores)
494 eraseInstruction(SI);
495
496 ++NumMemSetInfer;
497 }
498
499 return AMemSet;
500}
501
502// This method try to lift a store instruction before position P.
503// It will lift the store and its argument + that anything that
504// may alias with these.
505// The method returns true if it was successful.
506bool MemCpyOptPass::moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI) {
507 // If the store alias this position, early bail out.
508 MemoryLocation StoreLoc = MemoryLocation::get(SI);
509 if (isModOrRefSet(AA->getModRefInfo(P, StoreLoc)))
510 return false;
511
512 // Keep track of the arguments of all instruction we plan to lift
513 // so we can make sure to lift them as well if appropriate.
515 auto AddArg = [&](Value *Arg) {
516 auto *I = dyn_cast<Instruction>(Arg);
517 if (I && I->getParent() == SI->getParent()) {
518 // Cannot hoist user of P above P
519 if (I == P) return false;
520 Args.insert(I);
521 }
522 return true;
523 };
524 if (!AddArg(SI->getPointerOperand()))
525 return false;
526
527 // Instruction to lift before P.
529
530 // Memory locations of lifted instructions.
531 SmallVector<MemoryLocation, 8> MemLocs{StoreLoc};
532
533 // Lifted calls.
535
536 const MemoryLocation LoadLoc = MemoryLocation::get(LI);
537
538 for (auto I = --SI->getIterator(), E = P->getIterator(); I != E; --I) {
539 auto *C = &*I;
540
541 // Make sure hoisting does not perform a store that was not guaranteed to
542 // happen.
544 return false;
545
546 bool MayAlias = isModOrRefSet(AA->getModRefInfo(C, std::nullopt));
547
548 bool NeedLift = false;
549 if (Args.erase(C))
550 NeedLift = true;
551 else if (MayAlias) {
552 NeedLift = llvm::any_of(MemLocs, [C, this](const MemoryLocation &ML) {
553 return isModOrRefSet(AA->getModRefInfo(C, ML));
554 });
555
556 if (!NeedLift)
557 NeedLift = llvm::any_of(Calls, [C, this](const CallBase *Call) {
558 return isModOrRefSet(AA->getModRefInfo(C, Call));
559 });
560 }
561
562 if (!NeedLift)
563 continue;
564
565 if (MayAlias) {
566 // Since LI is implicitly moved downwards past the lifted instructions,
567 // none of them may modify its source.
568 if (isModSet(AA->getModRefInfo(C, LoadLoc)))
569 return false;
570 else if (const auto *Call = dyn_cast<CallBase>(C)) {
571 // If we can't lift this before P, it's game over.
572 if (isModOrRefSet(AA->getModRefInfo(P, Call)))
573 return false;
574
575 Calls.push_back(Call);
576 } else if (isa<LoadInst>(C) || isa<StoreInst>(C) || isa<VAArgInst>(C)) {
577 // If we can't lift this before P, it's game over.
578 auto ML = MemoryLocation::get(C);
579 if (isModOrRefSet(AA->getModRefInfo(P, ML)))
580 return false;
581
582 MemLocs.push_back(ML);
583 } else
584 // We don't know how to lift this instruction.
585 return false;
586 }
587
588 ToLift.push_back(C);
589 for (Value *Op : C->operands())
590 if (!AddArg(Op))
591 return false;
592 }
593
594 // Find MSSA insertion point. Normally P will always have a corresponding
595 // memory access before which we can insert. However, with non-standard AA
596 // pipelines, there may be a mismatch between AA and MSSA, in which case we
597 // will scan for a memory access before P. In either case, we know for sure
598 // that at least the load will have a memory access.
599 // TODO: Simplify this once P will be determined by MSSA, in which case the
600 // discrepancy can no longer occur.
601 MemoryUseOrDef *MemInsertPoint = nullptr;
602 if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(P)) {
603 MemInsertPoint = cast<MemoryUseOrDef>(--MA->getIterator());
604 } else {
605 const Instruction *ConstP = P;
606 for (const Instruction &I : make_range(++ConstP->getReverseIterator(),
607 ++LI->getReverseIterator())) {
608 if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(&I)) {
609 MemInsertPoint = MA;
610 break;
611 }
612 }
613 }
614
615 // We made it, we need to lift.
616 for (auto *I : llvm::reverse(ToLift)) {
617 LLVM_DEBUG(dbgs() << "Lifting " << *I << " before " << *P << "\n");
618 I->moveBefore(P);
619 assert(MemInsertPoint && "Must have found insert point");
620 if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(I)) {
621 MSSAU->moveAfter(MA, MemInsertPoint);
622 MemInsertPoint = MA;
623 }
624 }
625
626 return true;
627}
628
629bool MemCpyOptPass::processStoreOfLoad(StoreInst *SI, LoadInst *LI,
630 const DataLayout &DL,
632 if (!LI->isSimple() || !LI->hasOneUse() ||
633 LI->getParent() != SI->getParent())
634 return false;
635
636 auto *T = LI->getType();
637 // Don't introduce calls to memcpy/memmove intrinsics out of thin air if
638 // the corresponding libcalls are not available.
639 // TODO: We should really distinguish between libcall availability and
640 // our ability to introduce intrinsics.
641 if (T->isAggregateType() &&
643 (TLI->has(LibFunc_memcpy) && TLI->has(LibFunc_memmove)))) {
645
646 // We use alias analysis to check if an instruction may store to
647 // the memory we load from in between the load and the store. If
648 // such an instruction is found, we try to promote there instead
649 // of at the store position.
650 // TODO: Can use MSSA for this.
651 Instruction *P = SI;
652 for (auto &I : make_range(++LI->getIterator(), SI->getIterator())) {
653 if (isModSet(AA->getModRefInfo(&I, LoadLoc))) {
654 P = &I;
655 break;
656 }
657 }
658
659 // We found an instruction that may write to the loaded memory.
660 // We can try to promote at this position instead of the store
661 // position if nothing aliases the store memory after this and the store
662 // destination is not in the range.
663 if (P && P != SI) {
664 if (!moveUp(SI, P, LI))
665 P = nullptr;
666 }
667
668 // If a valid insertion position is found, then we can promote
669 // the load/store pair to a memcpy.
670 if (P) {
671 // If we load from memory that may alias the memory we store to,
672 // memmove must be used to preserve semantic. If not, memcpy can
673 // be used. Also, if we load from constant memory, memcpy can be used
674 // as the constant memory won't be modified.
675 bool UseMemMove = false;
676 if (isModSet(AA->getModRefInfo(SI, LoadLoc)))
677 UseMemMove = true;
678
679 uint64_t Size = DL.getTypeStoreSize(T);
680
682 Instruction *M;
683 if (UseMemMove)
684 M = Builder.CreateMemMove(
685 SI->getPointerOperand(), SI->getAlign(),
686 LI->getPointerOperand(), LI->getAlign(), Size);
687 else
688 M = Builder.CreateMemCpy(
689 SI->getPointerOperand(), SI->getAlign(),
690 LI->getPointerOperand(), LI->getAlign(), Size);
691 M->copyMetadata(*SI, LLVMContext::MD_DIAssignID);
692
693 LLVM_DEBUG(dbgs() << "Promoting " << *LI << " to " << *SI << " => "
694 << *M << "\n");
695
696 auto *LastDef =
697 cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(SI));
698 auto *NewAccess = MSSAU->createMemoryAccessAfter(M, LastDef, LastDef);
699 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
700
701 eraseInstruction(SI);
702 eraseInstruction(LI);
703 ++NumMemCpyInstr;
704
705 // Make sure we do not invalidate the iterator.
706 BBI = M->getIterator();
707 return true;
708 }
709 }
710
711 // Detect cases where we're performing call slot forwarding, but
712 // happen to be using a load-store pair to implement it, rather than
713 // a memcpy.
714 BatchAAResults BAA(*AA);
715 auto GetCall = [&]() -> CallInst * {
716 // We defer this expensive clobber walk until the cheap checks
717 // have been done on the source inside performCallSlotOptzn.
718 if (auto *LoadClobber = dyn_cast<MemoryUseOrDef>(
719 MSSA->getWalker()->getClobberingMemoryAccess(LI, BAA)))
720 return dyn_cast_or_null<CallInst>(LoadClobber->getMemoryInst());
721 return nullptr;
722 };
723
724 bool Changed = performCallSlotOptzn(
725 LI, SI, SI->getPointerOperand()->stripPointerCasts(),
727 DL.getTypeStoreSize(SI->getOperand(0)->getType()),
728 std::min(SI->getAlign(), LI->getAlign()), BAA, GetCall);
729 if (Changed) {
730 eraseInstruction(SI);
731 eraseInstruction(LI);
732 ++NumMemCpyInstr;
733 return true;
734 }
735
736 return false;
737}
738
739bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
740 if (!SI->isSimple()) return false;
741
742 // Avoid merging nontemporal stores since the resulting
743 // memcpy/memset would not be able to preserve the nontemporal hint.
744 // In theory we could teach how to propagate the !nontemporal metadata to
745 // memset calls. However, that change would force the backend to
746 // conservatively expand !nontemporal memset calls back to sequences of
747 // store instructions (effectively undoing the merging).
748 if (SI->getMetadata(LLVMContext::MD_nontemporal))
749 return false;
750
751 const DataLayout &DL = SI->getModule()->getDataLayout();
752
753 Value *StoredVal = SI->getValueOperand();
754
755 // Not all the transforms below are correct for non-integral pointers, bail
756 // until we've audited the individual pieces.
757 if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()))
758 return false;
759
760 // Load to store forwarding can be interpreted as memcpy.
761 if (auto *LI = dyn_cast<LoadInst>(StoredVal))
762 return processStoreOfLoad(SI, LI, DL, BBI);
763
764 // The following code creates memset intrinsics out of thin air. Don't do
765 // this if the corresponding libfunc is not available.
766 // TODO: We should really distinguish between libcall availability and
767 // our ability to introduce intrinsics.
768 if (!(TLI->has(LibFunc_memset) || EnableMemCpyOptWithoutLibcalls))
769 return false;
770
771 // There are two cases that are interesting for this code to handle: memcpy
772 // and memset. Right now we only handle memset.
773
774 // Ensure that the value being stored is something that can be memset'able a
775 // byte at a time like "0" or "-1" or any width, as well as things like
776 // 0xA0A0A0A0 and 0.0.
777 auto *V = SI->getOperand(0);
778 if (Value *ByteVal = isBytewiseValue(V, DL)) {
779 if (Instruction *I = tryMergingIntoMemset(SI, SI->getPointerOperand(),
780 ByteVal)) {
781 BBI = I->getIterator(); // Don't invalidate iterator.
782 return true;
783 }
784
785 // If we have an aggregate, we try to promote it to memset regardless
786 // of opportunity for merging as it can expose optimization opportunities
787 // in subsequent passes.
788 auto *T = V->getType();
789 if (T->isAggregateType()) {
790 uint64_t Size = DL.getTypeStoreSize(T);
792 auto *M = Builder.CreateMemSet(SI->getPointerOperand(), ByteVal, Size,
793 SI->getAlign());
794 M->copyMetadata(*SI, LLVMContext::MD_DIAssignID);
795
796 LLVM_DEBUG(dbgs() << "Promoting " << *SI << " to " << *M << "\n");
797
798 // The newly inserted memset is immediately overwritten by the original
799 // store, so we do not need to rename uses.
800 auto *StoreDef = cast<MemoryDef>(MSSA->getMemoryAccess(SI));
801 auto *NewAccess = MSSAU->createMemoryAccessBefore(
802 M, StoreDef->getDefiningAccess(), StoreDef);
803 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/false);
804
805 eraseInstruction(SI);
806 NumMemSetInfer++;
807
808 // Make sure we do not invalidate the iterator.
809 BBI = M->getIterator();
810 return true;
811 }
812 }
813
814 return false;
815}
816
817bool MemCpyOptPass::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) {
818 // See if there is another memset or store neighboring this memset which
819 // allows us to widen out the memset to do a single larger store.
820 if (isa<ConstantInt>(MSI->getLength()) && !MSI->isVolatile())
821 if (Instruction *I = tryMergingIntoMemset(MSI, MSI->getDest(),
822 MSI->getValue())) {
823 BBI = I->getIterator(); // Don't invalidate iterator.
824 return true;
825 }
826 return false;
827}
828
829/// Takes a memcpy and a call that it depends on,
830/// and checks for the possibility of a call slot optimization by having
831/// the call write its result directly into the destination of the memcpy.
832bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
833 Instruction *cpyStore, Value *cpyDest,
834 Value *cpySrc, TypeSize cpySize,
835 Align cpyDestAlign, BatchAAResults &BAA,
836 std::function<CallInst *()> GetC) {
837 // The general transformation to keep in mind is
838 //
839 // call @func(..., src, ...)
840 // memcpy(dest, src, ...)
841 //
842 // ->
843 //
844 // memcpy(dest, src, ...)
845 // call @func(..., dest, ...)
846 //
847 // Since moving the memcpy is technically awkward, we additionally check that
848 // src only holds uninitialized values at the moment of the call, meaning that
849 // the memcpy can be discarded rather than moved.
850
851 // We can't optimize scalable types.
852 if (cpySize.isScalable())
853 return false;
854
855 // Require that src be an alloca. This simplifies the reasoning considerably.
856 auto *srcAlloca = dyn_cast<AllocaInst>(cpySrc);
857 if (!srcAlloca)
858 return false;
859
860 ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize());
861 if (!srcArraySize)
862 return false;
863
864 const DataLayout &DL = cpyLoad->getModule()->getDataLayout();
865 uint64_t srcSize = DL.getTypeAllocSize(srcAlloca->getAllocatedType()) *
866 srcArraySize->getZExtValue();
867
868 if (cpySize < srcSize)
869 return false;
870
871 CallInst *C = GetC();
872 if (!C)
873 return false;
874
875 // Lifetime marks shouldn't be operated on.
876 if (Function *F = C->getCalledFunction())
877 if (F->isIntrinsic() && F->getIntrinsicID() == Intrinsic::lifetime_start)
878 return false;
879
880
881 if (C->getParent() != cpyStore->getParent()) {
882 LLVM_DEBUG(dbgs() << "Call Slot: block local restriction\n");
883 return false;
884 }
885
886 MemoryLocation DestLoc = isa<StoreInst>(cpyStore) ?
887 MemoryLocation::get(cpyStore) :
888 MemoryLocation::getForDest(cast<MemCpyInst>(cpyStore));
889
890 // Check that nothing touches the dest of the copy between
891 // the call and the store/memcpy.
892 Instruction *SkippedLifetimeStart = nullptr;
893 if (accessedBetween(BAA, DestLoc, MSSA->getMemoryAccess(C),
894 MSSA->getMemoryAccess(cpyStore), &SkippedLifetimeStart)) {
895 LLVM_DEBUG(dbgs() << "Call Slot: Dest pointer modified after call\n");
896 return false;
897 }
898
899 // If we need to move a lifetime.start above the call, make sure that we can
900 // actually do so. If the argument is bitcasted for example, we would have to
901 // move the bitcast as well, which we don't handle.
902 if (SkippedLifetimeStart) {
903 auto *LifetimeArg =
904 dyn_cast<Instruction>(SkippedLifetimeStart->getOperand(1));
905 if (LifetimeArg && LifetimeArg->getParent() == C->getParent() &&
906 C->comesBefore(LifetimeArg))
907 return false;
908 }
909
910 // Check that accessing the first srcSize bytes of dest will not cause a
911 // trap. Otherwise the transform is invalid since it might cause a trap
912 // to occur earlier than it otherwise would.
913 if (!isDereferenceableAndAlignedPointer(cpyDest, Align(1), APInt(64, cpySize),
914 DL, C, AC, DT)) {
915 LLVM_DEBUG(dbgs() << "Call Slot: Dest pointer not dereferenceable\n");
916 return false;
917 }
918
919 // Make sure that nothing can observe cpyDest being written early. There are
920 // a number of cases to consider:
921 // 1. cpyDest cannot be accessed between C and cpyStore as a precondition of
922 // the transform.
923 // 2. C itself may not access cpyDest (prior to the transform). This is
924 // checked further below.
925 // 3. If cpyDest is accessible to the caller of this function (potentially
926 // captured and not based on an alloca), we need to ensure that we cannot
927 // unwind between C and cpyStore. This is checked here.
928 // 4. If cpyDest is potentially captured, there may be accesses to it from
929 // another thread. In this case, we need to check that cpyStore is
930 // guaranteed to be executed if C is. As it is a non-atomic access, it
931 // renders accesses from other threads undefined.
932 // TODO: This is currently not checked.
933 if (mayBeVisibleThroughUnwinding(cpyDest, C, cpyStore)) {
934 LLVM_DEBUG(dbgs() << "Call Slot: Dest may be visible through unwinding\n");
935 return false;
936 }
937
938 // Check that dest points to memory that is at least as aligned as src.
939 Align srcAlign = srcAlloca->getAlign();
940 bool isDestSufficientlyAligned = srcAlign <= cpyDestAlign;
941 // If dest is not aligned enough and we can't increase its alignment then
942 // bail out.
943 if (!isDestSufficientlyAligned && !isa<AllocaInst>(cpyDest)) {
944 LLVM_DEBUG(dbgs() << "Call Slot: Dest not sufficiently aligned\n");
945 return false;
946 }
947
948 // Check that src is not accessed except via the call and the memcpy. This
949 // guarantees that it holds only undefined values when passed in (so the final
950 // memcpy can be dropped), that it is not read or written between the call and
951 // the memcpy, and that writing beyond the end of it is undefined.
952 SmallVector<User *, 8> srcUseList(srcAlloca->users());
953 while (!srcUseList.empty()) {
954 User *U = srcUseList.pop_back_val();
955
956 if (isa<BitCastInst>(U) || isa<AddrSpaceCastInst>(U)) {
957 append_range(srcUseList, U->users());
958 continue;
959 }
960 if (const auto *G = dyn_cast<GetElementPtrInst>(U)) {
961 if (!G->hasAllZeroIndices())
962 return false;
963
964 append_range(srcUseList, U->users());
965 continue;
966 }
967 if (const auto *IT = dyn_cast<IntrinsicInst>(U))
968 if (IT->isLifetimeStartOrEnd())
969 continue;
970
971 if (U != C && U != cpyLoad)
972 return false;
973 }
974
975 // Check whether src is captured by the called function, in which case there
976 // may be further indirect uses of src.
977 bool SrcIsCaptured = any_of(C->args(), [&](Use &U) {
978 return U->stripPointerCasts() == cpySrc &&
979 !C->doesNotCapture(C->getArgOperandNo(&U));
980 });
981
982 // If src is captured, then check whether there are any potential uses of
983 // src through the captured pointer before the lifetime of src ends, either
984 // due to a lifetime.end or a return from the function.
985 if (SrcIsCaptured) {
986 // Check that dest is not captured before/at the call. We have already
987 // checked that src is not captured before it. If either had been captured,
988 // then the call might be comparing the argument against the captured dest
989 // or src pointer.
990 Value *DestObj = getUnderlyingObject(cpyDest);
991 if (!isIdentifiedFunctionLocal(DestObj) ||
992 PointerMayBeCapturedBefore(DestObj, /* ReturnCaptures */ true,
993 /* StoreCaptures */ true, C, DT,
994 /* IncludeI */ true))
995 return false;
996
997 MemoryLocation SrcLoc =
998 MemoryLocation(srcAlloca, LocationSize::precise(srcSize));
999 for (Instruction &I :
1000 make_range(++C->getIterator(), C->getParent()->end())) {
1001 // Lifetime of srcAlloca ends at lifetime.end.
1002 if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
1003 if (II->getIntrinsicID() == Intrinsic::lifetime_end &&
1004 II->getArgOperand(1)->stripPointerCasts() == srcAlloca &&
1005 cast<ConstantInt>(II->getArgOperand(0))->uge(srcSize))
1006 break;
1007 }
1008
1009 // Lifetime of srcAlloca ends at return.
1010 if (isa<ReturnInst>(&I))
1011 break;
1012
1013 // Ignore the direct read of src in the load.
1014 if (&I == cpyLoad)
1015 continue;
1016
1017 // Check whether this instruction may mod/ref src through the captured
1018 // pointer (we have already any direct mod/refs in the loop above).
1019 // Also bail if we hit a terminator, as we don't want to scan into other
1020 // blocks.
1021 if (isModOrRefSet(BAA.getModRefInfo(&I, SrcLoc)) || I.isTerminator())
1022 return false;
1023 }
1024 }
1025
1026 // Since we're changing the parameter to the callsite, we need to make sure
1027 // that what would be the new parameter dominates the callsite.
1028 if (!DT->dominates(cpyDest, C)) {
1029 // Support moving a constant index GEP before the call.
1030 auto *GEP = dyn_cast<GetElementPtrInst>(cpyDest);
1031 if (GEP && GEP->hasAllConstantIndices() &&
1032 DT->dominates(GEP->getPointerOperand(), C))
1033 GEP->moveBefore(C);
1034 else
1035 return false;
1036 }
1037
1038 // In addition to knowing that the call does not access src in some
1039 // unexpected manner, for example via a global, which we deduce from
1040 // the use analysis, we also need to know that it does not sneakily
1041 // access dest. We rely on AA to figure this out for us.
1042 MemoryLocation DestWithSrcSize(cpyDest, LocationSize::precise(srcSize));
1043 ModRefInfo MR = BAA.getModRefInfo(C, DestWithSrcSize);
1044 // If necessary, perform additional analysis.
1045 if (isModOrRefSet(MR))
1046 MR = BAA.callCapturesBefore(C, DestWithSrcSize, DT);
1047 if (isModOrRefSet(MR))
1048 return false;
1049
1050 // We can't create address space casts here because we don't know if they're
1051 // safe for the target.
1052 if (cpySrc->getType()->getPointerAddressSpace() !=
1053 cpyDest->getType()->getPointerAddressSpace())
1054 return false;
1055 for (unsigned ArgI = 0; ArgI < C->arg_size(); ++ArgI)
1056 if (C->getArgOperand(ArgI)->stripPointerCasts() == cpySrc &&
1057 cpySrc->getType()->getPointerAddressSpace() !=
1058 C->getArgOperand(ArgI)->getType()->getPointerAddressSpace())
1059 return false;
1060
1061 // All the checks have passed, so do the transformation.
1062 bool changedArgument = false;
1063 for (unsigned ArgI = 0; ArgI < C->arg_size(); ++ArgI)
1064 if (C->getArgOperand(ArgI)->stripPointerCasts() == cpySrc) {
1065 Value *Dest = cpySrc->getType() == cpyDest->getType() ? cpyDest
1066 : CastInst::CreatePointerCast(cpyDest, cpySrc->getType(),
1067 cpyDest->getName(), C);
1068 changedArgument = true;
1069 if (C->getArgOperand(ArgI)->getType() == Dest->getType())
1070 C->setArgOperand(ArgI, Dest);
1071 else
1072 C->setArgOperand(ArgI, CastInst::CreatePointerCast(
1073 Dest, C->getArgOperand(ArgI)->getType(),
1074 Dest->getName(), C));
1075 }
1076
1077 if (!changedArgument)
1078 return false;
1079
1080 // If the destination wasn't sufficiently aligned then increase its alignment.
1081 if (!isDestSufficientlyAligned) {
1082 assert(isa<AllocaInst>(cpyDest) && "Can only increase alloca alignment!");
1083 cast<AllocaInst>(cpyDest)->setAlignment(srcAlign);
1084 }
1085
1086 if (SkippedLifetimeStart) {
1087 SkippedLifetimeStart->moveBefore(C);
1088 MSSAU->moveBefore(MSSA->getMemoryAccess(SkippedLifetimeStart),
1089 MSSA->getMemoryAccess(C));
1090 }
1091
1092 // Update AA metadata
1093 // FIXME: MD_tbaa_struct and MD_mem_parallel_loop_access should also be
1094 // handled here, but combineMetadata doesn't support them yet
1095 unsigned KnownIDs[] = {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
1096 LLVMContext::MD_noalias,
1097 LLVMContext::MD_invariant_group,
1098 LLVMContext::MD_access_group};
1099 combineMetadata(C, cpyLoad, KnownIDs, true);
1100 if (cpyLoad != cpyStore)
1101 combineMetadata(C, cpyStore, KnownIDs, true);
1102
1103 ++NumCallSlot;
1104 return true;
1105}
1106
1107/// We've found that the (upward scanning) memory dependence of memcpy 'M' is
1108/// the memcpy 'MDep'. Try to simplify M to copy from MDep's input if we can.
1109bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
1110 MemCpyInst *MDep,
1111 BatchAAResults &BAA) {
1112 // We can only transforms memcpy's where the dest of one is the source of the
1113 // other.
1114 if (M->getSource() != MDep->getDest() || MDep->isVolatile())
1115 return false;
1116
1117 // If dep instruction is reading from our current input, then it is a noop
1118 // transfer and substituting the input won't change this instruction. Just
1119 // ignore the input and let someone else zap MDep. This handles cases like:
1120 // memcpy(a <- a)
1121 // memcpy(b <- a)
1122 if (M->getSource() == MDep->getSource())
1123 return false;
1124
1125 // Second, the length of the memcpy's must be the same, or the preceding one
1126 // must be larger than the following one.
1127 if (MDep->getLength() != M->getLength()) {
1128 auto *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
1129 auto *MLen = dyn_cast<ConstantInt>(M->getLength());
1130 if (!MDepLen || !MLen || MDepLen->getZExtValue() < MLen->getZExtValue())
1131 return false;
1132 }
1133
1134 // Verify that the copied-from memory doesn't change in between the two
1135 // transfers. For example, in:
1136 // memcpy(a <- b)
1137 // *b = 42;
1138 // memcpy(c <- a)
1139 // It would be invalid to transform the second memcpy into memcpy(c <- b).
1140 //
1141 // TODO: If the code between M and MDep is transparent to the destination "c",
1142 // then we could still perform the xform by moving M up to the first memcpy.
1143 // TODO: It would be sufficient to check the MDep source up to the memcpy
1144 // size of M, rather than MDep.
1145 if (writtenBetween(MSSA, BAA, MemoryLocation::getForSource(MDep),
1146 MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(M)))
1147 return false;
1148
1149 // If the dest of the second might alias the source of the first, then the
1150 // source and dest might overlap. In addition, if the source of the first
1151 // points to constant memory, they won't overlap by definition. Otherwise, we
1152 // still want to eliminate the intermediate value, but we have to generate a
1153 // memmove instead of memcpy.
1154 bool UseMemMove = false;
1156 UseMemMove = true;
1157
1158 // If all checks passed, then we can transform M.
1159 LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy->memcpy src:\n"
1160 << *MDep << '\n' << *M << '\n');
1161
1162 // TODO: Is this worth it if we're creating a less aligned memcpy? For
1163 // example we could be moving from movaps -> movq on x86.
1165 Instruction *NewM;
1166 if (UseMemMove)
1167 NewM = Builder.CreateMemMove(M->getRawDest(), M->getDestAlign(),
1168 MDep->getRawSource(), MDep->getSourceAlign(),
1169 M->getLength(), M->isVolatile());
1170 else if (isa<MemCpyInlineInst>(M)) {
1171 // llvm.memcpy may be promoted to llvm.memcpy.inline, but the converse is
1172 // never allowed since that would allow the latter to be lowered as a call
1173 // to an external function.
1174 NewM = Builder.CreateMemCpyInline(
1175 M->getRawDest(), M->getDestAlign(), MDep->getRawSource(),
1176 MDep->getSourceAlign(), M->getLength(), M->isVolatile());
1177 } else
1178 NewM = Builder.CreateMemCpy(M->getRawDest(), M->getDestAlign(),
1179 MDep->getRawSource(), MDep->getSourceAlign(),
1180 M->getLength(), M->isVolatile());
1181 NewM->copyMetadata(*M, LLVMContext::MD_DIAssignID);
1182
1183 assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M)));
1184 auto *LastDef = cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M));
1185 auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
1186 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1187
1188 // Remove the instruction we're replacing.
1189 eraseInstruction(M);
1190 ++NumMemCpyInstr;
1191 return true;
1192}
1193
1194/// We've found that the (upward scanning) memory dependence of \p MemCpy is
1195/// \p MemSet. Try to simplify \p MemSet to only set the trailing bytes that
1196/// weren't copied over by \p MemCpy.
1197///
1198/// In other words, transform:
1199/// \code
1200/// memset(dst, c, dst_size);
1201/// memcpy(dst, src, src_size);
1202/// \endcode
1203/// into:
1204/// \code
1205/// memcpy(dst, src, src_size);
1206/// memset(dst + src_size, c, dst_size <= src_size ? 0 : dst_size - src_size);
1207/// \endcode
1208bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
1209 MemSetInst *MemSet,
1210 BatchAAResults &BAA) {
1211 // We can only transform memset/memcpy with the same destination.
1212 if (!BAA.isMustAlias(MemSet->getDest(), MemCpy->getDest()))
1213 return false;
1214
1215 // Check that src and dst of the memcpy aren't the same. While memcpy
1216 // operands cannot partially overlap, exact equality is allowed.
1217 if (isModSet(BAA.getModRefInfo(MemCpy, MemoryLocation::getForSource(MemCpy))))
1218 return false;
1219
1220 // We know that dst up to src_size is not written. We now need to make sure
1221 // that dst up to dst_size is not accessed. (If we did not move the memset,
1222 // checking for reads would be sufficient.)
1224 MSSA->getMemoryAccess(MemSet),
1225 MSSA->getMemoryAccess(MemCpy)))
1226 return false;
1227
1228 // Use the same i8* dest as the memcpy, killing the memset dest if different.
1229 Value *Dest = MemCpy->getRawDest();
1230 Value *DestSize = MemSet->getLength();
1231 Value *SrcSize = MemCpy->getLength();
1232
1233 if (mayBeVisibleThroughUnwinding(Dest, MemSet, MemCpy))
1234 return false;
1235
1236 // If the sizes are the same, simply drop the memset instead of generating
1237 // a replacement with zero size.
1238 if (DestSize == SrcSize) {
1239 eraseInstruction(MemSet);
1240 return true;
1241 }
1242
1243 // By default, create an unaligned memset.
1244 Align Alignment = Align(1);
1245 // If Dest is aligned, and SrcSize is constant, use the minimum alignment
1246 // of the sum.
1247 const Align DestAlign = std::max(MemSet->getDestAlign().valueOrOne(),
1248 MemCpy->getDestAlign().valueOrOne());
1249 if (DestAlign > 1)
1250 if (auto *SrcSizeC = dyn_cast<ConstantInt>(SrcSize))
1251 Alignment = commonAlignment(DestAlign, SrcSizeC->getZExtValue());
1252
1253 IRBuilder<> Builder(MemCpy);
1254
1255 // If the sizes have different types, zext the smaller one.
1256 if (DestSize->getType() != SrcSize->getType()) {
1257 if (DestSize->getType()->getIntegerBitWidth() >
1258 SrcSize->getType()->getIntegerBitWidth())
1259 SrcSize = Builder.CreateZExt(SrcSize, DestSize->getType());
1260 else
1261 DestSize = Builder.CreateZExt(DestSize, SrcSize->getType());
1262 }
1263
1264 Value *Ule = Builder.CreateICmpULE(DestSize, SrcSize);
1265 Value *SizeDiff = Builder.CreateSub(DestSize, SrcSize);
1266 Value *MemsetLen = Builder.CreateSelect(
1267 Ule, ConstantInt::getNullValue(DestSize->getType()), SizeDiff);
1268 unsigned DestAS = Dest->getType()->getPointerAddressSpace();
1269 Instruction *NewMemSet = Builder.CreateMemSet(
1270 Builder.CreateGEP(
1271 Builder.getInt8Ty(),
1272 Builder.CreatePointerCast(Dest, Builder.getInt8PtrTy(DestAS)),
1273 SrcSize),
1274 MemSet->getOperand(1), MemsetLen, Alignment);
1275
1276 assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)) &&
1277 "MemCpy must be a MemoryDef");
1278 // The new memset is inserted after the memcpy, but it is known that its
1279 // defining access is the memset about to be removed which immediately
1280 // precedes the memcpy.
1281 auto *LastDef =
1282 cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy));
1283 auto *NewAccess = MSSAU->createMemoryAccessBefore(
1284 NewMemSet, LastDef->getDefiningAccess(), LastDef);
1285 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1286
1287 eraseInstruction(MemSet);
1288 return true;
1289}
1290
1291/// Determine whether the instruction has undefined content for the given Size,
1292/// either because it was freshly alloca'd or started its lifetime.
1294 MemoryDef *Def, Value *Size) {
1295 if (MSSA->isLiveOnEntryDef(Def))
1296 return isa<AllocaInst>(getUnderlyingObject(V));
1297
1298 if (auto *II = dyn_cast_or_null<IntrinsicInst>(Def->getMemoryInst())) {
1299 if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
1300 auto *LTSize = cast<ConstantInt>(II->getArgOperand(0));
1301
1302 if (auto *CSize = dyn_cast<ConstantInt>(Size)) {
1303 if (AA.isMustAlias(V, II->getArgOperand(1)) &&
1304 LTSize->getZExtValue() >= CSize->getZExtValue())
1305 return true;
1306 }
1307
1308 // If the lifetime.start covers a whole alloca (as it almost always
1309 // does) and we're querying a pointer based on that alloca, then we know
1310 // the memory is definitely undef, regardless of how exactly we alias.
1311 // The size also doesn't matter, as an out-of-bounds access would be UB.
1312 if (auto *Alloca = dyn_cast<AllocaInst>(getUnderlyingObject(V))) {
1313 if (getUnderlyingObject(II->getArgOperand(1)) == Alloca) {
1314 const DataLayout &DL = Alloca->getModule()->getDataLayout();
1315 if (std::optional<TypeSize> AllocaSize =
1316 Alloca->getAllocationSize(DL))
1317 if (*AllocaSize == LTSize->getValue())
1318 return true;
1319 }
1320 }
1321 }
1322 }
1323
1324 return false;
1325}
1326
1327/// Transform memcpy to memset when its source was just memset.
1328/// In other words, turn:
1329/// \code
1330/// memset(dst1, c, dst1_size);
1331/// memcpy(dst2, dst1, dst2_size);
1332/// \endcode
1333/// into:
1334/// \code
1335/// memset(dst1, c, dst1_size);
1336/// memset(dst2, c, dst2_size);
1337/// \endcode
1338/// When dst2_size <= dst1_size.
1339bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
1340 MemSetInst *MemSet,
1341 BatchAAResults &BAA) {
1342 // Make sure that memcpy(..., memset(...), ...), that is we are memsetting and
1343 // memcpying from the same address. Otherwise it is hard to reason about.
1344 if (!BAA.isMustAlias(MemSet->getRawDest(), MemCpy->getRawSource()))
1345 return false;
1346
1347 Value *MemSetSize = MemSet->getLength();
1348 Value *CopySize = MemCpy->getLength();
1349
1350 if (MemSetSize != CopySize) {
1351 // Make sure the memcpy doesn't read any more than what the memset wrote.
1352 // Don't worry about sizes larger than i64.
1353
1354 // A known memset size is required.
1355 auto *CMemSetSize = dyn_cast<ConstantInt>(MemSetSize);
1356 if (!CMemSetSize)
1357 return false;
1358
1359 // A known memcpy size is also required.
1360 auto *CCopySize = dyn_cast<ConstantInt>(CopySize);
1361 if (!CCopySize)
1362 return false;
1363 if (CCopySize->getZExtValue() > CMemSetSize->getZExtValue()) {
1364 // If the memcpy is larger than the memset, but the memory was undef prior
1365 // to the memset, we can just ignore the tail. Technically we're only
1366 // interested in the bytes from MemSetSize..CopySize here, but as we can't
1367 // easily represent this location, we use the full 0..CopySize range.
1368 MemoryLocation MemCpyLoc = MemoryLocation::getForSource(MemCpy);
1369 bool CanReduceSize = false;
1370 MemoryUseOrDef *MemSetAccess = MSSA->getMemoryAccess(MemSet);
1372 MemSetAccess->getDefiningAccess(), MemCpyLoc, BAA);
1373 if (auto *MD = dyn_cast<MemoryDef>(Clobber))
1374 if (hasUndefContents(MSSA, BAA, MemCpy->getSource(), MD, CopySize))
1375 CanReduceSize = true;
1376
1377 if (!CanReduceSize)
1378 return false;
1379 CopySize = MemSetSize;
1380 }
1381 }
1382
1383 IRBuilder<> Builder(MemCpy);
1384 Instruction *NewM =
1385 Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1),
1386 CopySize, MemCpy->getDestAlign());
1387 auto *LastDef =
1388 cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy));
1389 auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
1390 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1391
1392 return true;
1393}
1394
1395/// Perform simplification of memcpy's. If we have memcpy A
1396/// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite
1397/// B to be a memcpy from X to Z (or potentially a memmove, depending on
1398/// circumstances). This allows later passes to remove the first memcpy
1399/// altogether.
1400bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
1401 // We can only optimize non-volatile memcpy's.
1402 if (M->isVolatile()) return false;
1403
1404 // If the source and destination of the memcpy are the same, then zap it.
1405 if (M->getSource() == M->getDest()) {
1406 ++BBI;
1407 eraseInstruction(M);
1408 return true;
1409 }
1410
1411 // If copying from a constant, try to turn the memcpy into a memset.
1412 if (auto *GV = dyn_cast<GlobalVariable>(M->getSource()))
1413 if (GV->isConstant() && GV->hasDefinitiveInitializer())
1414 if (Value *ByteVal = isBytewiseValue(GV->getInitializer(),
1415 M->getModule()->getDataLayout())) {
1417 Instruction *NewM = Builder.CreateMemSet(
1418 M->getRawDest(), ByteVal, M->getLength(), M->getDestAlign(), false);
1419 auto *LastDef =
1420 cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M));
1421 auto *NewAccess =
1422 MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
1423 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1424
1425 eraseInstruction(M);
1426 ++NumCpyToSet;
1427 return true;
1428 }
1429
1430 BatchAAResults BAA(*AA);
1431 MemoryUseOrDef *MA = MSSA->getMemoryAccess(M);
1432 // FIXME: Not using getClobberingMemoryAccess() here due to PR54682.
1433 MemoryAccess *AnyClobber = MA->getDefiningAccess();
1435 const MemoryAccess *DestClobber =
1436 MSSA->getWalker()->getClobberingMemoryAccess(AnyClobber, DestLoc, BAA);
1437
1438 // Try to turn a partially redundant memset + memcpy into
1439 // memcpy + smaller memset. We don't need the memcpy size for this.
1440 // The memcpy most post-dom the memset, so limit this to the same basic
1441 // block. A non-local generalization is likely not worthwhile.
1442 if (auto *MD = dyn_cast<MemoryDef>(DestClobber))
1443 if (auto *MDep = dyn_cast_or_null<MemSetInst>(MD->getMemoryInst()))
1444 if (DestClobber->getBlock() == M->getParent())
1445 if (processMemSetMemCpyDependence(M, MDep, BAA))
1446 return true;
1447
1448 MemoryAccess *SrcClobber = MSSA->getWalker()->getClobberingMemoryAccess(
1449 AnyClobber, MemoryLocation::getForSource(M), BAA);
1450
1451 // There are four possible optimizations we can do for memcpy:
1452 // a) memcpy-memcpy xform which exposes redundance for DSE.
1453 // b) call-memcpy xform for return slot optimization.
1454 // c) memcpy from freshly alloca'd space or space that has just started
1455 // its lifetime copies undefined data, and we can therefore eliminate
1456 // the memcpy in favor of the data that was already at the destination.
1457 // d) memcpy from a just-memset'd source can be turned into memset.
1458 if (auto *MD = dyn_cast<MemoryDef>(SrcClobber)) {
1459 if (Instruction *MI = MD->getMemoryInst()) {
1460 if (auto *CopySize = dyn_cast<ConstantInt>(M->getLength())) {
1461 if (auto *C = dyn_cast<CallInst>(MI)) {
1462 if (performCallSlotOptzn(M, M, M->getDest(), M->getSource(),
1463 TypeSize::getFixed(CopySize->getZExtValue()),
1464 M->getDestAlign().valueOrOne(), BAA,
1465 [C]() -> CallInst * { return C; })) {
1466 LLVM_DEBUG(dbgs() << "Performed call slot optimization:\n"
1467 << " call: " << *C << "\n"
1468 << " memcpy: " << *M << "\n");
1469 eraseInstruction(M);
1470 ++NumMemCpyInstr;
1471 return true;
1472 }
1473 }
1474 }
1475 if (auto *MDep = dyn_cast<MemCpyInst>(MI))
1476 return processMemCpyMemCpyDependence(M, MDep, BAA);
1477 if (auto *MDep = dyn_cast<MemSetInst>(MI)) {
1478 if (performMemCpyToMemSetOptzn(M, MDep, BAA)) {
1479 LLVM_DEBUG(dbgs() << "Converted memcpy to memset\n");
1480 eraseInstruction(M);
1481 ++NumCpyToSet;
1482 return true;
1483 }
1484 }
1485 }
1486
1487 if (hasUndefContents(MSSA, BAA, M->getSource(), MD, M->getLength())) {
1488 LLVM_DEBUG(dbgs() << "Removed memcpy from undef\n");
1489 eraseInstruction(M);
1490 ++NumMemCpyInstr;
1491 return true;
1492 }
1493 }
1494
1495 return false;
1496}
1497
1498/// Transforms memmove calls to memcpy calls when the src/dst are guaranteed
1499/// not to alias.
1500bool MemCpyOptPass::processMemMove(MemMoveInst *M) {
1501 // See if the source could be modified by this memmove potentially.
1503 return false;
1504
1505 LLVM_DEBUG(dbgs() << "MemCpyOptPass: Optimizing memmove -> memcpy: " << *M
1506 << "\n");
1507
1508 // If not, then we know we can transform this.
1509 Type *ArgTys[3] = { M->getRawDest()->getType(),
1510 M->getRawSource()->getType(),
1511 M->getLength()->getType() };
1512 M->setCalledFunction(Intrinsic::getDeclaration(M->getModule(),
1513 Intrinsic::memcpy, ArgTys));
1514
1515 // For MemorySSA nothing really changes (except that memcpy may imply stricter
1516 // aliasing guarantees).
1517
1518 ++NumMoveToCpy;
1519 return true;
1520}
1521
1522/// This is called on every byval argument in call sites.
1523bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) {
1524 const DataLayout &DL = CB.getCaller()->getParent()->getDataLayout();
1525 // Find out what feeds this byval argument.
1526 Value *ByValArg = CB.getArgOperand(ArgNo);
1527 Type *ByValTy = CB.getParamByValType(ArgNo);
1528 TypeSize ByValSize = DL.getTypeAllocSize(ByValTy);
1529 MemoryLocation Loc(ByValArg, LocationSize::precise(ByValSize));
1530 MemoryUseOrDef *CallAccess = MSSA->getMemoryAccess(&CB);
1531 if (!CallAccess)
1532 return false;
1533 MemCpyInst *MDep = nullptr;
1534 BatchAAResults BAA(*AA);
1536 CallAccess->getDefiningAccess(), Loc, BAA);
1537 if (auto *MD = dyn_cast<MemoryDef>(Clobber))
1538 MDep = dyn_cast_or_null<MemCpyInst>(MD->getMemoryInst());
1539
1540 // If the byval argument isn't fed by a memcpy, ignore it. If it is fed by
1541 // a memcpy, see if we can byval from the source of the memcpy instead of the
1542 // result.
1543 if (!MDep || MDep->isVolatile() ||
1544 ByValArg->stripPointerCasts() != MDep->getDest())
1545 return false;
1546
1547 // The length of the memcpy must be larger or equal to the size of the byval.
1548 auto *C1 = dyn_cast<ConstantInt>(MDep->getLength());
1549 if (!C1 || !TypeSize::isKnownGE(
1550 TypeSize::getFixed(C1->getValue().getZExtValue()), ByValSize))
1551 return false;
1552
1553 // Get the alignment of the byval. If the call doesn't specify the alignment,
1554 // then it is some target specific value that we can't know.
1555 MaybeAlign ByValAlign = CB.getParamAlign(ArgNo);
1556 if (!ByValAlign) return false;
1557
1558 // If it is greater than the memcpy, then we check to see if we can force the
1559 // source of the memcpy to the alignment we need. If we fail, we bail out.
1560 MaybeAlign MemDepAlign = MDep->getSourceAlign();
1561 if ((!MemDepAlign || *MemDepAlign < *ByValAlign) &&
1562 getOrEnforceKnownAlignment(MDep->getSource(), ByValAlign, DL, &CB, AC,
1563 DT) < *ByValAlign)
1564 return false;
1565
1566 // The address space of the memcpy source must match the byval argument
1567 if (MDep->getSource()->getType()->getPointerAddressSpace() !=
1568 ByValArg->getType()->getPointerAddressSpace())
1569 return false;
1570
1571 // Verify that the copied-from memory doesn't change in between the memcpy and
1572 // the byval call.
1573 // memcpy(a <- b)
1574 // *b = 42;
1575 // foo(*a)
1576 // It would be invalid to transform the second memcpy into foo(*b).
1577 if (writtenBetween(MSSA, BAA, MemoryLocation::getForSource(MDep),
1578 MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(&CB)))
1579 return false;
1580
1581 Value *TmpCast = MDep->getSource();
1582 if (MDep->getSource()->getType() != ByValArg->getType()) {
1583 BitCastInst *TmpBitCast = new BitCastInst(MDep->getSource(), ByValArg->getType(),
1584 "tmpcast", &CB);
1585 // Set the tmpcast's DebugLoc to MDep's
1586 TmpBitCast->setDebugLoc(MDep->getDebugLoc());
1587 TmpCast = TmpBitCast;
1588 }
1589
1590 LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy to byval:\n"
1591 << " " << *MDep << "\n"
1592 << " " << CB << "\n");
1593
1594 // Otherwise we're good! Update the byval argument.
1595 CB.setArgOperand(ArgNo, TmpCast);
1596 ++NumMemCpyInstr;
1597 return true;
1598}
1599
1600/// Executes one iteration of MemCpyOptPass.
1601bool MemCpyOptPass::iterateOnFunction(Function &F) {
1602 bool MadeChange = false;
1603
1604 // Walk all instruction in the function.
1605 for (BasicBlock &BB : F) {
1606 // Skip unreachable blocks. For example processStore assumes that an
1607 // instruction in a BB can't be dominated by a later instruction in the
1608 // same BB (which is a scenario that can happen for an unreachable BB that
1609 // has itself as a predecessor).
1610 if (!DT->isReachableFromEntry(&BB))
1611 continue;
1612
1613 for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) {
1614 // Avoid invalidating the iterator.
1615 Instruction *I = &*BI++;
1616
1617 bool RepeatInstruction = false;
1618
1619 if (auto *SI = dyn_cast<StoreInst>(I))
1620 MadeChange |= processStore(SI, BI);
1621 else if (auto *M = dyn_cast<MemSetInst>(I))
1622 RepeatInstruction = processMemSet(M, BI);
1623 else if (auto *M = dyn_cast<MemCpyInst>(I))
1624 RepeatInstruction = processMemCpy(M, BI);
1625 else if (auto *M = dyn_cast<MemMoveInst>(I))
1626 RepeatInstruction = processMemMove(M);
1627 else if (auto *CB = dyn_cast<CallBase>(I)) {
1628 for (unsigned i = 0, e = CB->arg_size(); i != e; ++i)
1629 if (CB->isByValArgument(i))
1630 MadeChange |= processByValArgument(*CB, i);
1631 }
1632
1633 // Reprocess the instruction if desired.
1634 if (RepeatInstruction) {
1635 if (BI != BB.begin())
1636 --BI;
1637 MadeChange = true;
1638 }
1639 }
1640 }
1641
1642 return MadeChange;
1643}
1644
1646 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
1647 auto *AA = &AM.getResult<AAManager>(F);
1648 auto *AC = &AM.getResult<AssumptionAnalysis>(F);
1649 auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
1650 auto *MSSA = &AM.getResult<MemorySSAAnalysis>(F);
1651
1652 bool MadeChange = runImpl(F, &TLI, AA, AC, DT, &MSSA->getMSSA());
1653 if (!MadeChange)
1654 return PreservedAnalyses::all();
1655
1659 return PA;
1660}
1661
1663 AliasAnalysis *AA_, AssumptionCache *AC_,
1664 DominatorTree *DT_, MemorySSA *MSSA_) {
1665 bool MadeChange = false;
1666 TLI = TLI_;
1667 AA = AA_;
1668 AC = AC_;
1669 DT = DT_;
1670 MSSA = MSSA_;
1671 MemorySSAUpdater MSSAU_(MSSA_);
1672 MSSAU = &MSSAU_;
1673
1674 while (true) {
1675 if (!iterateOnFunction(F))
1676 break;
1677 MadeChange = true;
1678 }
1679
1680 if (VerifyMemorySSA)
1681 MSSA_->verifyMemorySSA();
1682
1683 return MadeChange;
1684}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
assume Assume Builder
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseSet and SmallDenseSet classes.
uint64_t Size
This is the interface for a simple mod/ref and alias analysis over globals.
Hexagon Common GEP
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
static bool mayBeVisibleThroughUnwinding(Value *V, Instruction *Start, Instruction *End)
static bool accessedBetween(BatchAAResults &AA, MemoryLocation Loc, const MemoryUseOrDef *Start, const MemoryUseOrDef *End, Instruction **SkippedLifetimeStart=nullptr)
static bool hasUndefContents(MemorySSA *MSSA, BatchAAResults &AA, Value *V, MemoryDef *Def, Value *Size)
Determine whether the instruction has undefined content for the given Size, either because it was fre...
static cl::opt< bool > EnableMemCpyOptWithoutLibcalls("enable-memcpyopt-without-libcalls", cl::Hidden, cl::desc("Enable memcpyopt even when libcalls are disabled"))
static bool writtenBetween(MemorySSA *MSSA, BatchAAResults &AA, MemoryLocation Loc, const MemoryUseOrDef *Start, const MemoryUseOrDef *End)
This file provides utility analysis objects describing memory locations.
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
static void addRange(SmallVectorImpl< ConstantInt * > &EndPoints, ConstantInt *Low, ConstantInt *High)
Definition: Metadata.cpp:1102
Module.h This file contains the declarations for the Module class.
#define P(N)
This header defines various interfaces for pass management in LLVM.
@ SI
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
A manager for alias analyses.
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Check whether or not an instruction may read or write the optionally specified memory location.
Class for arbitrary precision integers.
Definition: APInt.h:75
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:620
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:774
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:87
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
ModRefInfo callCapturesBefore(const Instruction *I, const MemoryLocation &MemLoc, DominatorTree *DT)
This class represents a no-op cast from one type to another.
Represents analyses that only rely on functions' control flow.
Definition: PassManager.h:113
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1186
bool isByValArgument(unsigned ArgNo) const
Determine whether this argument is passed by value.
Definition: InstrTypes.h:1690
MaybeAlign getParamAlign(unsigned ArgNo) const
Extract the alignment for a call or parameter (0=unknown).
Definition: InstrTypes.h:1754
Type * getParamByValType(unsigned ArgNo) const
Extract the byval type for a call or parameter.
Definition: InstrTypes.h:1763
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1353
void setArgOperand(unsigned i, Value *v)
Definition: InstrTypes.h:1358
unsigned arg_size() const
Definition: InstrTypes.h:1351
Function * getCaller()
Helper to get the caller (the parent function).
This class represents a function call, abstracting a target machine's calling convention.
static CastInst * CreatePointerCast(Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd)
Create a BitCast AddrSpaceCast, or a PtrToInt cast instruction.
This is the shared class of boolean and integer constants.
Definition: Constants.h:78
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:145
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:356
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
Implements a dense probed hash-table based set.
Definition: DenseSet.h:271
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:321
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:652
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2558
void mergeDIAssignID(ArrayRef< const Instruction * > SourceInstructions)
Merge the DIAssignID metadata from this instruction and those attached to instructions in SourceInstr...
Definition: DebugInfo.cpp:888
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:358
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:70
const BasicBlock * getParent() const
Definition: Instruction.h:90
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:355
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
An instruction for reading from memory.
Definition: Instructions.h:177
Value * getPointerOperand()
Definition: Instructions.h:264
bool isSimple() const
Definition: Instructions.h:256
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:220
static LocationSize precise(uint64_t Value)
This class wraps the llvm.memcpy intrinsic.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
bool runImpl(Function &F, TargetLibraryInfo *TLI, AAResults *AA, AssumptionCache *AC, DominatorTree *DT, MemorySSA *MSSA)
Value * getLength() const
Value * getRawDest() const
Value * getDest() const
This is just like getRawDest, but it strips off any cast instructions (including addrspacecast) that ...
MaybeAlign getDestAlign() const
bool isVolatile() const
This class wraps the llvm.memmove intrinsic.
Value * getValue() const
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
Value * getRawSource() const
Return the arguments to the instruction.
MaybeAlign getSourceAlign() const
Value * getSource() const
This is just like getRawSource, but it strips off any cast instructions that feed it,...
BasicBlock * getBlock() const
Definition: MemorySSA.h:164
Represents a read-write access to memory, whether it is a must-alias, or a may-alias.
Definition: MemorySSA.h:372
Representation for a specific memory location.
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
static MemoryLocation getForSource(const MemTransferInst *MTI)
Return a location representing the source of a memory transfer.
static MemoryLocation getForDest(const MemIntrinsic *MI)
Return a location representing the destination of a memory set or transfer.
An analysis that produces MemorySSA for a function.
Definition: MemorySSA.h:936
MemorySSA * getMemorySSA() const
Get handle on MemorySSA.
MemoryUseOrDef * createMemoryAccessBefore(Instruction *I, MemoryAccess *Definition, MemoryUseOrDef *InsertPt)
Create a MemoryAccess in MemorySSA before or after an existing MemoryAccess.
void insertDef(MemoryDef *Def, bool RenameUses=false)
Insert a definition into the MemorySSA IR.
void moveAfter(MemoryUseOrDef *What, MemoryUseOrDef *Where)
void removeMemoryAccess(MemoryAccess *, bool OptimizePhis=false)
Remove a MemoryAccess from MemorySSA, including updating all definitions and uses.
MemoryUseOrDef * createMemoryAccessAfter(Instruction *I, MemoryAccess *Definition, MemoryAccess *InsertPt)
void moveBefore(MemoryUseOrDef *What, MemoryUseOrDef *Where)
MemoryAccess * getClobberingMemoryAccess(const Instruction *I, BatchAAResults &AA)
Given a memory Mod/Ref/ModRef'ing instruction, calling this will give you the nearest dominating Memo...
Definition: MemorySSA.h:1046
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:700
bool dominates(const MemoryAccess *A, const MemoryAccess *B) const
Given two memory accesses in potentially different blocks, determine whether MemoryAccess A dominates...
Definition: MemorySSA.cpp:2115
void verifyMemorySSA(VerificationLevel=VerificationLevel::Fast) const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
Definition: MemorySSA.cpp:1862
MemorySSAWalker * getWalker()
Definition: MemorySSA.cpp:1557
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
Definition: MemorySSA.h:717
bool isLiveOnEntryDef(const MemoryAccess *MA) const
Return true if MA represents the live on entry value.
Definition: MemorySSA.h:737
Class that has the common methods + fields of memory uses/defs.
Definition: MemorySSA.h:252
MemoryAccess * getDefiningAccess() const
Get the access that produces the memory state used by this Use.
Definition: MemorySSA.h:262
Instruction * getMemoryInst() const
Get the instruction that this MemoryUse represents.
Definition: MemorySSA.h:259
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:398
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
void preserveSet()
Mark an analysis set as preserved.
Definition: PassManager.h:188
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:173
typename SuperClass::const_iterator const_iterator
Definition: SmallVector.h:582
typename SuperClass::iterator iterator
Definition: SmallVector.h:581
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
An instruction for storing to memory.
Definition: Instructions.h:301
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool has(LibFunc F) const
Tests whether a library function is available.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:322
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:350
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
Value * getOperand(unsigned i) const
Definition: User.h:169
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:685
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:308
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:182
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:166
static constexpr bool isKnownGE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition: TypeSize.h:219
reverse_self_iterator getReverseIterator()
Definition: ilist_node.h:85
self_iterator getIterator()
Definition: ilist_node.h:82
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1506
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:226
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:235
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:406
bool isDereferenceableAndAlignedPointer(const Value *V, Type *Ty, Align Alignment, const DataLayout &DL, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Returns true if V is always a dereferenceable pointer with alignment greater or equal than requested.
Definition: Loads.cpp:201
auto partition_point(R &&Range, Predicate P)
Binary search for the first iterator in a range where a predicate is false.
Definition: STLExtras.h:2076
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool PointerMayBeCapturedBefore(const Value *V, bool ReturnCaptures, bool StoreCaptures, const Instruction *I, const DominatorTree *DT, bool IncludeI=false, unsigned MaxUsesToExplore=0, const LoopInfo *LI=nullptr)
PointerMayBeCapturedBefore - Return true if this pointer value may be captured by the enclosing funct...
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
Definition: STLExtras.h:2129
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value,...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1826
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:511
Align getOrEnforceKnownAlignment(Value *V, MaybeAlign PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
Definition: Local.cpp:1439
bool isModSet(const ModRefInfo MRI)
Definition: ModRef.h:48
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool isModOrRefSet(const ModRefInfo MRI)
Definition: ModRef.h:42
bool isNotVisibleOnUnwind(const Value *Object, bool &RequiresNoCaptureBeforeUnwind)
Return true if Object memory is not visible after an unwind, in the sense that program semantics cann...
void combineMetadata(Instruction *K, const Instruction *J, ArrayRef< unsigned > KnownIDs, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition: Local.cpp:2644
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
Definition: ModRef.h:27
bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition: MemorySSA.cpp:89
bool isIdentifiedFunctionLocal(const Value *V)
Return true if V is umabigously identified at the function-level.
bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition: Casting.h:566
Value * isBytewiseValue(Value *V, const DataLayout &DL)
If the specified value can be set by repeating the same byte in memory, return the i8 value that it i...
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
std::optional< int64_t > isPointerOffset(const Value *Ptr1, const Value *Ptr2, const DataLayout &DL)
If Ptr1 is provably equal to Ptr2 plus a constant offset, return that offset.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Definition: Alignment.h:141