LLVM  16.0.0git
MemCpyOptimizer.cpp
Go to the documentation of this file.
1 //===- MemCpyOptimizer.cpp - Optimize use of memcpy and friends -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass performs various transformations related to eliminating memcpy
10 // calls, or transforming sets of stores into memset's.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 #include "llvm/ADT/DenseSet.h"
16 #include "llvm/ADT/None.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/Statistic.h"
25 #include "llvm/Analysis/Loads.h"
31 #include "llvm/IR/BasicBlock.h"
32 #include "llvm/IR/Constants.h"
33 #include "llvm/IR/DataLayout.h"
34 #include "llvm/IR/DerivedTypes.h"
35 #include "llvm/IR/Dominators.h"
36 #include "llvm/IR/Function.h"
37 #include "llvm/IR/GlobalVariable.h"
38 #include "llvm/IR/IRBuilder.h"
39 #include "llvm/IR/InstrTypes.h"
40 #include "llvm/IR/Instruction.h"
41 #include "llvm/IR/Instructions.h"
42 #include "llvm/IR/IntrinsicInst.h"
43 #include "llvm/IR/Intrinsics.h"
44 #include "llvm/IR/LLVMContext.h"
45 #include "llvm/IR/Module.h"
46 #include "llvm/IR/PassManager.h"
47 #include "llvm/IR/Type.h"
48 #include "llvm/IR/User.h"
49 #include "llvm/IR/Value.h"
50 #include "llvm/InitializePasses.h"
51 #include "llvm/Pass.h"
52 #include "llvm/Support/Casting.h"
53 #include "llvm/Support/Debug.h"
56 #include "llvm/Transforms/Scalar.h"
58 #include <algorithm>
59 #include <cassert>
60 #include <cstdint>
61 
62 using namespace llvm;
63 
64 #define DEBUG_TYPE "memcpyopt"
65 
67  "enable-memcpyopt-without-libcalls", cl::Hidden,
68  cl::desc("Enable memcpyopt even when libcalls are disabled"));
69 
70 STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted");
71 STATISTIC(NumMemSetInfer, "Number of memsets inferred");
72 STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy");
73 STATISTIC(NumCpyToSet, "Number of memcpys converted to memset");
74 STATISTIC(NumCallSlot, "Number of call slot optimizations performed");
75 
76 namespace {
77 
78 /// Represents a range of memset'd bytes with the ByteVal value.
79 /// This allows us to analyze stores like:
80 /// store 0 -> P+1
81 /// store 0 -> P+0
82 /// store 0 -> P+3
83 /// store 0 -> P+2
84 /// which sometimes happens with stores to arrays of structs etc. When we see
85 /// the first store, we make a range [1, 2). The second store extends the range
86 /// to [0, 2). The third makes a new range [2, 3). The fourth store joins the
87 /// two ranges into [0, 3) which is memset'able.
88 struct MemsetRange {
89  // Start/End - A semi range that describes the span that this range covers.
90  // The range is closed at the start and open at the end: [Start, End).
91  int64_t Start, End;
92 
93  /// StartPtr - The getelementptr instruction that points to the start of the
94  /// range.
95  Value *StartPtr;
96 
97  /// Alignment - The known alignment of the first store.
98  MaybeAlign Alignment;
99 
100  /// TheStores - The actual stores that make up this range.
102 
103  bool isProfitableToUseMemset(const DataLayout &DL) const;
104 };
105 
106 } // end anonymous namespace
107 
108 bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const {
109  // If we found more than 4 stores to merge or 16 bytes, use memset.
110  if (TheStores.size() >= 4 || End-Start >= 16) return true;
111 
112  // If there is nothing to merge, don't do anything.
113  if (TheStores.size() < 2) return false;
114 
115  // If any of the stores are a memset, then it is always good to extend the
116  // memset.
117  for (Instruction *SI : TheStores)
118  if (!isa<StoreInst>(SI))
119  return true;
120 
121  // Assume that the code generator is capable of merging pairs of stores
122  // together if it wants to.
123  if (TheStores.size() == 2) return false;
124 
125  // If we have fewer than 8 stores, it can still be worthwhile to do this.
126  // For example, merging 4 i8 stores into an i32 store is useful almost always.
127  // However, merging 2 32-bit stores isn't useful on a 32-bit architecture (the
128  // memset will be split into 2 32-bit stores anyway) and doing so can
129  // pessimize the llvm optimizer.
130  //
131  // Since we don't have perfect knowledge here, make some assumptions: assume
132  // the maximum GPR width is the same size as the largest legal integer
133  // size. If so, check to see whether we will end up actually reducing the
134  // number of stores used.
135  unsigned Bytes = unsigned(End-Start);
136  unsigned MaxIntSize = DL.getLargestLegalIntTypeSizeInBits() / 8;
137  if (MaxIntSize == 0)
138  MaxIntSize = 1;
139  unsigned NumPointerStores = Bytes / MaxIntSize;
140 
141  // Assume the remaining bytes if any are done a byte at a time.
142  unsigned NumByteStores = Bytes % MaxIntSize;
143 
144  // If we will reduce the # stores (according to this heuristic), do the
145  // transformation. This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32
146  // etc.
147  return TheStores.size() > NumPointerStores+NumByteStores;
148 }
149 
150 namespace {
151 
152 class MemsetRanges {
153  using range_iterator = SmallVectorImpl<MemsetRange>::iterator;
154 
155  /// A sorted list of the memset ranges.
157 
158  const DataLayout &DL;
159 
160 public:
161  MemsetRanges(const DataLayout &DL) : DL(DL) {}
162 
163  using const_iterator = SmallVectorImpl<MemsetRange>::const_iterator;
164 
165  const_iterator begin() const { return Ranges.begin(); }
166  const_iterator end() const { return Ranges.end(); }
167  bool empty() const { return Ranges.empty(); }
168 
169  void addInst(int64_t OffsetFromFirst, Instruction *Inst) {
170  if (auto *SI = dyn_cast<StoreInst>(Inst))
171  addStore(OffsetFromFirst, SI);
172  else
173  addMemSet(OffsetFromFirst, cast<MemSetInst>(Inst));
174  }
175 
176  void addStore(int64_t OffsetFromFirst, StoreInst *SI) {
177  TypeSize StoreSize = DL.getTypeStoreSize(SI->getOperand(0)->getType());
178  assert(!StoreSize.isScalable() && "Can't track scalable-typed stores");
179  addRange(OffsetFromFirst, StoreSize.getFixedSize(), SI->getPointerOperand(),
180  SI->getAlign(), SI);
181  }
182 
183  void addMemSet(int64_t OffsetFromFirst, MemSetInst *MSI) {
184  int64_t Size = cast<ConstantInt>(MSI->getLength())->getZExtValue();
185  addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getDestAlign(), MSI);
186  }
187 
188  void addRange(int64_t Start, int64_t Size, Value *Ptr, MaybeAlign Alignment,
189  Instruction *Inst);
190 };
191 
192 } // end anonymous namespace
193 
194 /// Add a new store to the MemsetRanges data structure. This adds a
195 /// new range for the specified store at the specified offset, merging into
196 /// existing ranges as appropriate.
197 void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,
198  MaybeAlign Alignment, Instruction *Inst) {
199  int64_t End = Start+Size;
200 
201  range_iterator I = partition_point(
202  Ranges, [=](const MemsetRange &O) { return O.End < Start; });
203 
204  // We now know that I == E, in which case we didn't find anything to merge
205  // with, or that Start <= I->End. If End < I->Start or I == E, then we need
206  // to insert a new range. Handle this now.
207  if (I == Ranges.end() || End < I->Start) {
208  MemsetRange &R = *Ranges.insert(I, MemsetRange());
209  R.Start = Start;
210  R.End = End;
211  R.StartPtr = Ptr;
212  R.Alignment = Alignment;
213  R.TheStores.push_back(Inst);
214  return;
215  }
216 
217  // This store overlaps with I, add it.
218  I->TheStores.push_back(Inst);
219 
220  // At this point, we may have an interval that completely contains our store.
221  // If so, just add it to the interval and return.
222  if (I->Start <= Start && I->End >= End)
223  return;
224 
225  // Now we know that Start <= I->End and End >= I->Start so the range overlaps
226  // but is not entirely contained within the range.
227 
228  // See if the range extends the start of the range. In this case, it couldn't
229  // possibly cause it to join the prior range, because otherwise we would have
230  // stopped on *it*.
231  if (Start < I->Start) {
232  I->Start = Start;
233  I->StartPtr = Ptr;
234  I->Alignment = Alignment;
235  }
236 
237  // Now we know that Start <= I->End and Start >= I->Start (so the startpoint
238  // is in or right at the end of I), and that End >= I->Start. Extend I out to
239  // End.
240  if (End > I->End) {
241  I->End = End;
242  range_iterator NextI = I;
243  while (++NextI != Ranges.end() && End >= NextI->Start) {
244  // Merge the range in.
245  I->TheStores.append(NextI->TheStores.begin(), NextI->TheStores.end());
246  if (NextI->End > I->End)
247  I->End = NextI->End;
248  Ranges.erase(NextI);
249  NextI = I;
250  }
251  }
252 }
253 
254 //===----------------------------------------------------------------------===//
255 // MemCpyOptLegacyPass Pass
256 //===----------------------------------------------------------------------===//
257 
258 namespace {
259 
260 class MemCpyOptLegacyPass : public FunctionPass {
261  MemCpyOptPass Impl;
262 
263 public:
264  static char ID; // Pass identification, replacement for typeid
265 
266  MemCpyOptLegacyPass() : FunctionPass(ID) {
268  }
269 
270  bool runOnFunction(Function &F) override;
271 
272 private:
273  // This transformation requires dominator postdominator info
274  void getAnalysisUsage(AnalysisUsage &AU) const override {
275  AU.setPreservesCFG();
285  }
286 };
287 
288 } // end anonymous namespace
289 
290 char MemCpyOptLegacyPass::ID = 0;
291 
292 /// The public interface to this file...
293 FunctionPass *llvm::createMemCpyOptPass() { return new MemCpyOptLegacyPass(); }
294 
295 INITIALIZE_PASS_BEGIN(MemCpyOptLegacyPass, "memcpyopt", "MemCpy Optimization",
296  false, false)
303 INITIALIZE_PASS_END(MemCpyOptLegacyPass, "memcpyopt", "MemCpy Optimization",
305 
306 // Check that V is either not accessible by the caller, or unwinding cannot
307 // occur between Start and End.
309  Instruction *End) {
310  assert(Start->getParent() == End->getParent() && "Must be in same block");
311  // Function can't unwind, so it also can't be visible through unwinding.
312  if (Start->getFunction()->doesNotThrow())
313  return false;
314 
315  // Object is not visible on unwind.
316  // TODO: Support RequiresNoCaptureBeforeUnwind case.
317  bool RequiresNoCaptureBeforeUnwind;
319  RequiresNoCaptureBeforeUnwind) &&
320  !RequiresNoCaptureBeforeUnwind)
321  return false;
322 
323  // Check whether there are any unwinding instructions in the range.
324  return any_of(make_range(Start->getIterator(), End->getIterator()),
325  [](const Instruction &I) { return I.mayThrow(); });
326 }
327 
328 void MemCpyOptPass::eraseInstruction(Instruction *I) {
329  MSSAU->removeMemoryAccess(I);
330  I->eraseFromParent();
331 }
332 
333 // Check for mod or ref of Loc between Start and End, excluding both boundaries.
334 // Start and End must be in the same block.
335 // If SkippedLifetimeStart is provided, skip over one clobbering lifetime.start
336 // intrinsic and store it inside SkippedLifetimeStart.
338  const MemoryUseOrDef *Start,
339  const MemoryUseOrDef *End,
340  Instruction **SkippedLifetimeStart = nullptr) {
341  assert(Start->getBlock() == End->getBlock() && "Only local supported");
342  for (const MemoryAccess &MA :
343  make_range(++Start->getIterator(), End->getIterator())) {
344  Instruction *I = cast<MemoryUseOrDef>(MA).getMemoryInst();
345  if (isModOrRefSet(AA.getModRefInfo(I, Loc))) {
346  auto *II = dyn_cast<IntrinsicInst>(I);
347  if (II && II->getIntrinsicID() == Intrinsic::lifetime_start &&
348  SkippedLifetimeStart && !*SkippedLifetimeStart) {
349  *SkippedLifetimeStart = I;
350  continue;
351  }
352 
353  return true;
354  }
355  }
356  return false;
357 }
358 
359 // Check for mod of Loc between Start and End, excluding both boundaries.
360 // Start and End can be in different blocks.
361 static bool writtenBetween(MemorySSA *MSSA, AliasAnalysis &AA,
362  MemoryLocation Loc, const MemoryUseOrDef *Start,
363  const MemoryUseOrDef *End) {
364  if (isa<MemoryUse>(End)) {
365  // For MemoryUses, getClobberingMemoryAccess may skip non-clobbering writes.
366  // Manually check read accesses between Start and End, if they are in the
367  // same block, for clobbers. Otherwise assume Loc is clobbered.
368  return Start->getBlock() != End->getBlock() ||
369  any_of(
370  make_range(std::next(Start->getIterator()), End->getIterator()),
371  [&AA, Loc](const MemoryAccess &Acc) {
372  if (isa<MemoryUse>(&Acc))
373  return false;
374  Instruction *AccInst =
375  cast<MemoryUseOrDef>(&Acc)->getMemoryInst();
376  return isModSet(AA.getModRefInfo(AccInst, Loc));
377  });
378  }
379 
380  // TODO: Only walk until we hit Start.
382  End->getDefiningAccess(), Loc);
383  return !MSSA->dominates(Clobber, Start);
384 }
385 
386 /// When scanning forward over instructions, we look for some other patterns to
387 /// fold away. In particular, this looks for stores to neighboring locations of
388 /// memory. If it sees enough consecutive ones, it attempts to merge them
389 /// together into a memcpy/memset.
390 Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
391  Value *StartPtr,
392  Value *ByteVal) {
393  const DataLayout &DL = StartInst->getModule()->getDataLayout();
394 
395  // We can't track scalable types
396  if (auto *SI = dyn_cast<StoreInst>(StartInst))
397  if (DL.getTypeStoreSize(SI->getOperand(0)->getType()).isScalable())
398  return nullptr;
399 
400  // Okay, so we now have a single store that can be splatable. Scan to find
401  // all subsequent stores of the same value to offset from the same pointer.
402  // Join these together into ranges, so we can decide whether contiguous blocks
403  // are stored.
404  MemsetRanges Ranges(DL);
405 
406  BasicBlock::iterator BI(StartInst);
407 
408  // Keeps track of the last memory use or def before the insertion point for
409  // the new memset. The new MemoryDef for the inserted memsets will be inserted
410  // after MemInsertPoint. It points to either LastMemDef or to the last user
411  // before the insertion point of the memset, if there are any such users.
412  MemoryUseOrDef *MemInsertPoint = nullptr;
413  // Keeps track of the last MemoryDef between StartInst and the insertion point
414  // for the new memset. This will become the defining access of the inserted
415  // memsets.
416  MemoryDef *LastMemDef = nullptr;
417  for (++BI; !BI->isTerminator(); ++BI) {
418  auto *CurrentAcc = cast_or_null<MemoryUseOrDef>(
419  MSSAU->getMemorySSA()->getMemoryAccess(&*BI));
420  if (CurrentAcc) {
421  MemInsertPoint = CurrentAcc;
422  if (auto *CurrentDef = dyn_cast<MemoryDef>(CurrentAcc))
423  LastMemDef = CurrentDef;
424  }
425 
426  // Calls that only access inaccessible memory do not block merging
427  // accessible stores.
428  if (auto *CB = dyn_cast<CallBase>(BI)) {
429  if (CB->onlyAccessesInaccessibleMemory())
430  continue;
431  }
432 
433  if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) {
434  // If the instruction is readnone, ignore it, otherwise bail out. We
435  // don't even allow readonly here because we don't want something like:
436  // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A).
437  if (BI->mayWriteToMemory() || BI->mayReadFromMemory())
438  break;
439  continue;
440  }
441 
442  if (auto *NextStore = dyn_cast<StoreInst>(BI)) {
443  // If this is a store, see if we can merge it in.
444  if (!NextStore->isSimple()) break;
445 
446  Value *StoredVal = NextStore->getValueOperand();
447 
448  // Don't convert stores of non-integral pointer types to memsets (which
449  // stores integers).
450  if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()))
451  break;
452 
453  // We can't track ranges involving scalable types.
454  if (DL.getTypeStoreSize(StoredVal->getType()).isScalable())
455  break;
456 
457  // Check to see if this stored value is of the same byte-splattable value.
458  Value *StoredByte = isBytewiseValue(StoredVal, DL);
459  if (isa<UndefValue>(ByteVal) && StoredByte)
460  ByteVal = StoredByte;
461  if (ByteVal != StoredByte)
462  break;
463 
464  // Check to see if this store is to a constant offset from the start ptr.
466  isPointerOffset(StartPtr, NextStore->getPointerOperand(), DL);
467  if (!Offset)
468  break;
469 
470  Ranges.addStore(*Offset, NextStore);
471  } else {
472  auto *MSI = cast<MemSetInst>(BI);
473 
474  if (MSI->isVolatile() || ByteVal != MSI->getValue() ||
475  !isa<ConstantInt>(MSI->getLength()))
476  break;
477 
478  // Check to see if this store is to a constant offset from the start ptr.
479  Optional<int64_t> Offset = isPointerOffset(StartPtr, MSI->getDest(), DL);
480  if (!Offset)
481  break;
482 
483  Ranges.addMemSet(*Offset, MSI);
484  }
485  }
486 
487  // If we have no ranges, then we just had a single store with nothing that
488  // could be merged in. This is a very common case of course.
489  if (Ranges.empty())
490  return nullptr;
491 
492  // If we had at least one store that could be merged in, add the starting
493  // store as well. We try to avoid this unless there is at least something
494  // interesting as a small compile-time optimization.
495  Ranges.addInst(0, StartInst);
496 
497  // If we create any memsets, we put it right before the first instruction that
498  // isn't part of the memset block. This ensure that the memset is dominated
499  // by any addressing instruction needed by the start of the block.
500  IRBuilder<> Builder(&*BI);
501 
502  // Now that we have full information about ranges, loop over the ranges and
503  // emit memset's for anything big enough to be worthwhile.
504  Instruction *AMemSet = nullptr;
505  for (const MemsetRange &Range : Ranges) {
506  if (Range.TheStores.size() == 1) continue;
507 
508  // If it is profitable to lower this range to memset, do so now.
509  if (!Range.isProfitableToUseMemset(DL))
510  continue;
511 
512  // Otherwise, we do want to transform this! Create a new memset.
513  // Get the starting pointer of the block.
514  StartPtr = Range.StartPtr;
515 
516  AMemSet = Builder.CreateMemSet(StartPtr, ByteVal, Range.End - Range.Start,
517  Range.Alignment);
518  AMemSet->mergeDIAssignID(Range.TheStores);
519 
520  LLVM_DEBUG(dbgs() << "Replace stores:\n"; for (Instruction *SI
521  : Range.TheStores) dbgs()
522  << *SI << '\n';
523  dbgs() << "With: " << *AMemSet << '\n');
524  if (!Range.TheStores.empty())
525  AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc());
526 
527  assert(LastMemDef && MemInsertPoint &&
528  "Both LastMemDef and MemInsertPoint need to be set");
529  auto *NewDef =
530  cast<MemoryDef>(MemInsertPoint->getMemoryInst() == &*BI
531  ? MSSAU->createMemoryAccessBefore(
532  AMemSet, LastMemDef, MemInsertPoint)
533  : MSSAU->createMemoryAccessAfter(
534  AMemSet, LastMemDef, MemInsertPoint));
535  MSSAU->insertDef(NewDef, /*RenameUses=*/true);
536  LastMemDef = NewDef;
537  MemInsertPoint = NewDef;
538 
539  // Zap all the stores.
540  for (Instruction *SI : Range.TheStores)
541  eraseInstruction(SI);
542 
543  ++NumMemSetInfer;
544  }
545 
546  return AMemSet;
547 }
548 
549 // This method try to lift a store instruction before position P.
550 // It will lift the store and its argument + that anything that
551 // may alias with these.
552 // The method returns true if it was successful.
553 bool MemCpyOptPass::moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI) {
554  // If the store alias this position, early bail out.
556  if (isModOrRefSet(AA->getModRefInfo(P, StoreLoc)))
557  return false;
558 
559  // Keep track of the arguments of all instruction we plan to lift
560  // so we can make sure to lift them as well if appropriate.
562  auto AddArg = [&](Value *Arg) {
563  auto *I = dyn_cast<Instruction>(Arg);
564  if (I && I->getParent() == SI->getParent()) {
565  // Cannot hoist user of P above P
566  if (I == P) return false;
567  Args.insert(I);
568  }
569  return true;
570  };
571  if (!AddArg(SI->getPointerOperand()))
572  return false;
573 
574  // Instruction to lift before P.
576 
577  // Memory locations of lifted instructions.
578  SmallVector<MemoryLocation, 8> MemLocs{StoreLoc};
579 
580  // Lifted calls.
582 
583  const MemoryLocation LoadLoc = MemoryLocation::get(LI);
584 
585  for (auto I = --SI->getIterator(), E = P->getIterator(); I != E; --I) {
586  auto *C = &*I;
587 
588  // Make sure hoisting does not perform a store that was not guaranteed to
589  // happen.
591  return false;
592 
593  bool MayAlias = isModOrRefSet(AA->getModRefInfo(C, None));
594 
595  bool NeedLift = false;
596  if (Args.erase(C))
597  NeedLift = true;
598  else if (MayAlias) {
599  NeedLift = llvm::any_of(MemLocs, [C, this](const MemoryLocation &ML) {
600  return isModOrRefSet(AA->getModRefInfo(C, ML));
601  });
602 
603  if (!NeedLift)
604  NeedLift = llvm::any_of(Calls, [C, this](const CallBase *Call) {
605  return isModOrRefSet(AA->getModRefInfo(C, Call));
606  });
607  }
608 
609  if (!NeedLift)
610  continue;
611 
612  if (MayAlias) {
613  // Since LI is implicitly moved downwards past the lifted instructions,
614  // none of them may modify its source.
615  if (isModSet(AA->getModRefInfo(C, LoadLoc)))
616  return false;
617  else if (const auto *Call = dyn_cast<CallBase>(C)) {
618  // If we can't lift this before P, it's game over.
619  if (isModOrRefSet(AA->getModRefInfo(P, Call)))
620  return false;
621 
622  Calls.push_back(Call);
623  } else if (isa<LoadInst>(C) || isa<StoreInst>(C) || isa<VAArgInst>(C)) {
624  // If we can't lift this before P, it's game over.
625  auto ML = MemoryLocation::get(C);
626  if (isModOrRefSet(AA->getModRefInfo(P, ML)))
627  return false;
628 
629  MemLocs.push_back(ML);
630  } else
631  // We don't know how to lift this instruction.
632  return false;
633  }
634 
635  ToLift.push_back(C);
636  for (Value *Op : C->operands())
637  if (!AddArg(Op))
638  return false;
639  }
640 
641  // Find MSSA insertion point. Normally P will always have a corresponding
642  // memory access before which we can insert. However, with non-standard AA
643  // pipelines, there may be a mismatch between AA and MSSA, in which case we
644  // will scan for a memory access before P. In either case, we know for sure
645  // that at least the load will have a memory access.
646  // TODO: Simplify this once P will be determined by MSSA, in which case the
647  // discrepancy can no longer occur.
648  MemoryUseOrDef *MemInsertPoint = nullptr;
649  if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(P)) {
650  MemInsertPoint = cast<MemoryUseOrDef>(--MA->getIterator());
651  } else {
652  const Instruction *ConstP = P;
653  for (const Instruction &I : make_range(++ConstP->getReverseIterator(),
654  ++LI->getReverseIterator())) {
655  if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(&I)) {
656  MemInsertPoint = MA;
657  break;
658  }
659  }
660  }
661 
662  // We made it, we need to lift.
663  for (auto *I : llvm::reverse(ToLift)) {
664  LLVM_DEBUG(dbgs() << "Lifting " << *I << " before " << *P << "\n");
665  I->moveBefore(P);
666  assert(MemInsertPoint && "Must have found insert point");
667  if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(I)) {
668  MSSAU->moveAfter(MA, MemInsertPoint);
669  MemInsertPoint = MA;
670  }
671  }
672 
673  return true;
674 }
675 
676 bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
677  if (!SI->isSimple()) return false;
678 
679  // Avoid merging nontemporal stores since the resulting
680  // memcpy/memset would not be able to preserve the nontemporal hint.
681  // In theory we could teach how to propagate the !nontemporal metadata to
682  // memset calls. However, that change would force the backend to
683  // conservatively expand !nontemporal memset calls back to sequences of
684  // store instructions (effectively undoing the merging).
685  if (SI->getMetadata(LLVMContext::MD_nontemporal))
686  return false;
687 
688  const DataLayout &DL = SI->getModule()->getDataLayout();
689 
690  Value *StoredVal = SI->getValueOperand();
691 
692  // Not all the transforms below are correct for non-integral pointers, bail
693  // until we've audited the individual pieces.
694  if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()))
695  return false;
696 
697  // Load to store forwarding can be interpreted as memcpy.
698  if (auto *LI = dyn_cast<LoadInst>(StoredVal)) {
699  if (LI->isSimple() && LI->hasOneUse() &&
700  LI->getParent() == SI->getParent()) {
701 
702  auto *T = LI->getType();
703  // Don't introduce calls to memcpy/memmove intrinsics out of thin air if
704  // the corresponding libcalls are not available.
705  // TODO: We should really distinguish between libcall availability and
706  // our ability to introduce intrinsics.
707  if (T->isAggregateType() &&
709  (TLI->has(LibFunc_memcpy) && TLI->has(LibFunc_memmove)))) {
710  MemoryLocation LoadLoc = MemoryLocation::get(LI);
711 
712  // We use alias analysis to check if an instruction may store to
713  // the memory we load from in between the load and the store. If
714  // such an instruction is found, we try to promote there instead
715  // of at the store position.
716  // TODO: Can use MSSA for this.
717  Instruction *P = SI;
718  for (auto &I : make_range(++LI->getIterator(), SI->getIterator())) {
719  if (isModSet(AA->getModRefInfo(&I, LoadLoc))) {
720  P = &I;
721  break;
722  }
723  }
724 
725  // We found an instruction that may write to the loaded memory.
726  // We can try to promote at this position instead of the store
727  // position if nothing aliases the store memory after this and the store
728  // destination is not in the range.
729  if (P && P != SI) {
730  if (!moveUp(SI, P, LI))
731  P = nullptr;
732  }
733 
734  // If a valid insertion position is found, then we can promote
735  // the load/store pair to a memcpy.
736  if (P) {
737  // If we load from memory that may alias the memory we store to,
738  // memmove must be used to preserve semantic. If not, memcpy can
739  // be used. Also, if we load from constant memory, memcpy can be used
740  // as the constant memory won't be modified.
741  bool UseMemMove = false;
742  if (isModSet(AA->getModRefInfo(SI, LoadLoc)))
743  UseMemMove = true;
744 
745  uint64_t Size = DL.getTypeStoreSize(T);
746 
748  Instruction *M;
749  if (UseMemMove)
750  M = Builder.CreateMemMove(
751  SI->getPointerOperand(), SI->getAlign(),
752  LI->getPointerOperand(), LI->getAlign(), Size);
753  else
754  M = Builder.CreateMemCpy(
755  SI->getPointerOperand(), SI->getAlign(),
756  LI->getPointerOperand(), LI->getAlign(), Size);
757  M->copyMetadata(*SI, LLVMContext::MD_DIAssignID);
758 
759  LLVM_DEBUG(dbgs() << "Promoting " << *LI << " to " << *SI << " => "
760  << *M << "\n");
761 
762  auto *LastDef =
763  cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(SI));
764  auto *NewAccess = MSSAU->createMemoryAccessAfter(M, LastDef, LastDef);
765  MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
766 
767  eraseInstruction(SI);
768  eraseInstruction(LI);
769  ++NumMemCpyInstr;
770 
771  // Make sure we do not invalidate the iterator.
772  BBI = M->getIterator();
773  return true;
774  }
775  }
776 
777  // Detect cases where we're performing call slot forwarding, but
778  // happen to be using a load-store pair to implement it, rather than
779  // a memcpy.
780  auto GetCall = [&]() -> CallInst * {
781  // We defer this expensive clobber walk until the cheap checks
782  // have been done on the source inside performCallSlotOptzn.
783  if (auto *LoadClobber = dyn_cast<MemoryUseOrDef>(
784  MSSA->getWalker()->getClobberingMemoryAccess(LI)))
785  return dyn_cast_or_null<CallInst>(LoadClobber->getMemoryInst());
786  return nullptr;
787  };
788 
789  bool changed = performCallSlotOptzn(
790  LI, SI, SI->getPointerOperand()->stripPointerCasts(),
792  DL.getTypeStoreSize(SI->getOperand(0)->getType()),
793  std::min(SI->getAlign(), LI->getAlign()), GetCall);
794  if (changed) {
795  eraseInstruction(SI);
796  eraseInstruction(LI);
797  ++NumMemCpyInstr;
798  return true;
799  }
800  }
801  }
802 
803  // The following code creates memset intrinsics out of thin air. Don't do
804  // this if the corresponding libfunc is not available.
805  // TODO: We should really distinguish between libcall availability and
806  // our ability to introduce intrinsics.
807  if (!(TLI->has(LibFunc_memset) || EnableMemCpyOptWithoutLibcalls))
808  return false;
809 
810  // There are two cases that are interesting for this code to handle: memcpy
811  // and memset. Right now we only handle memset.
812 
813  // Ensure that the value being stored is something that can be memset'able a
814  // byte at a time like "0" or "-1" or any width, as well as things like
815  // 0xA0A0A0A0 and 0.0.
816  auto *V = SI->getOperand(0);
817  if (Value *ByteVal = isBytewiseValue(V, DL)) {
818  if (Instruction *I = tryMergingIntoMemset(SI, SI->getPointerOperand(),
819  ByteVal)) {
820  BBI = I->getIterator(); // Don't invalidate iterator.
821  return true;
822  }
823 
824  // If we have an aggregate, we try to promote it to memset regardless
825  // of opportunity for merging as it can expose optimization opportunities
826  // in subsequent passes.
827  auto *T = V->getType();
828  if (T->isAggregateType()) {
829  uint64_t Size = DL.getTypeStoreSize(T);
831  auto *M = Builder.CreateMemSet(SI->getPointerOperand(), ByteVal, Size,
832  SI->getAlign());
833  M->copyMetadata(*SI, LLVMContext::MD_DIAssignID);
834 
835  LLVM_DEBUG(dbgs() << "Promoting " << *SI << " to " << *M << "\n");
836 
837  // The newly inserted memset is immediately overwritten by the original
838  // store, so we do not need to rename uses.
839  auto *StoreDef = cast<MemoryDef>(MSSA->getMemoryAccess(SI));
840  auto *NewAccess = MSSAU->createMemoryAccessBefore(
841  M, StoreDef->getDefiningAccess(), StoreDef);
842  MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/false);
843 
844  eraseInstruction(SI);
845  NumMemSetInfer++;
846 
847  // Make sure we do not invalidate the iterator.
848  BBI = M->getIterator();
849  return true;
850  }
851  }
852 
853  return false;
854 }
855 
856 bool MemCpyOptPass::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) {
857  // See if there is another memset or store neighboring this memset which
858  // allows us to widen out the memset to do a single larger store.
859  if (isa<ConstantInt>(MSI->getLength()) && !MSI->isVolatile())
860  if (Instruction *I = tryMergingIntoMemset(MSI, MSI->getDest(),
861  MSI->getValue())) {
862  BBI = I->getIterator(); // Don't invalidate iterator.
863  return true;
864  }
865  return false;
866 }
867 
868 /// Takes a memcpy and a call that it depends on,
869 /// and checks for the possibility of a call slot optimization by having
870 /// the call write its result directly into the destination of the memcpy.
871 bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
872  Instruction *cpyStore, Value *cpyDest,
873  Value *cpySrc, TypeSize cpySize,
874  Align cpyAlign,
875  std::function<CallInst *()> GetC) {
876  // The general transformation to keep in mind is
877  //
878  // call @func(..., src, ...)
879  // memcpy(dest, src, ...)
880  //
881  // ->
882  //
883  // memcpy(dest, src, ...)
884  // call @func(..., dest, ...)
885  //
886  // Since moving the memcpy is technically awkward, we additionally check that
887  // src only holds uninitialized values at the moment of the call, meaning that
888  // the memcpy can be discarded rather than moved.
889 
890  // We can't optimize scalable types.
891  if (cpySize.isScalable())
892  return false;
893 
894  // Require that src be an alloca. This simplifies the reasoning considerably.
895  auto *srcAlloca = dyn_cast<AllocaInst>(cpySrc);
896  if (!srcAlloca)
897  return false;
898 
899  ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize());
900  if (!srcArraySize)
901  return false;
902 
903  const DataLayout &DL = cpyLoad->getModule()->getDataLayout();
904  uint64_t srcSize = DL.getTypeAllocSize(srcAlloca->getAllocatedType()) *
905  srcArraySize->getZExtValue();
906 
907  if (cpySize < srcSize)
908  return false;
909 
910  CallInst *C = GetC();
911  if (!C)
912  return false;
913 
914  // Lifetime marks shouldn't be operated on.
915  if (Function *F = C->getCalledFunction())
916  if (F->isIntrinsic() && F->getIntrinsicID() == Intrinsic::lifetime_start)
917  return false;
918 
919 
920  if (C->getParent() != cpyStore->getParent()) {
921  LLVM_DEBUG(dbgs() << "Call Slot: block local restriction\n");
922  return false;
923  }
924 
925  MemoryLocation DestLoc = isa<StoreInst>(cpyStore) ?
926  MemoryLocation::get(cpyStore) :
927  MemoryLocation::getForDest(cast<MemCpyInst>(cpyStore));
928 
929  // Check that nothing touches the dest of the copy between
930  // the call and the store/memcpy.
931  Instruction *SkippedLifetimeStart = nullptr;
932  if (accessedBetween(*AA, DestLoc, MSSA->getMemoryAccess(C),
933  MSSA->getMemoryAccess(cpyStore), &SkippedLifetimeStart)) {
934  LLVM_DEBUG(dbgs() << "Call Slot: Dest pointer modified after call\n");
935  return false;
936  }
937 
938  // If we need to move a lifetime.start above the call, make sure that we can
939  // actually do so. If the argument is bitcasted for example, we would have to
940  // move the bitcast as well, which we don't handle.
941  if (SkippedLifetimeStart) {
942  auto *LifetimeArg =
943  dyn_cast<Instruction>(SkippedLifetimeStart->getOperand(1));
944  if (LifetimeArg && LifetimeArg->getParent() == C->getParent() &&
945  C->comesBefore(LifetimeArg))
946  return false;
947  }
948 
949  // Check that accessing the first srcSize bytes of dest will not cause a
950  // trap. Otherwise the transform is invalid since it might cause a trap
951  // to occur earlier than it otherwise would.
952  if (!isDereferenceableAndAlignedPointer(cpyDest, Align(1), APInt(64, cpySize),
953  DL, C, AC, DT)) {
954  LLVM_DEBUG(dbgs() << "Call Slot: Dest pointer not dereferenceable\n");
955  return false;
956  }
957 
958  // Make sure that nothing can observe cpyDest being written early. There are
959  // a number of cases to consider:
960  // 1. cpyDest cannot be accessed between C and cpyStore as a precondition of
961  // the transform.
962  // 2. C itself may not access cpyDest (prior to the transform). This is
963  // checked further below.
964  // 3. If cpyDest is accessible to the caller of this function (potentially
965  // captured and not based on an alloca), we need to ensure that we cannot
966  // unwind between C and cpyStore. This is checked here.
967  // 4. If cpyDest is potentially captured, there may be accesses to it from
968  // another thread. In this case, we need to check that cpyStore is
969  // guaranteed to be executed if C is. As it is a non-atomic access, it
970  // renders accesses from other threads undefined.
971  // TODO: This is currently not checked.
972  if (mayBeVisibleThroughUnwinding(cpyDest, C, cpyStore)) {
973  LLVM_DEBUG(dbgs() << "Call Slot: Dest may be visible through unwinding\n");
974  return false;
975  }
976 
977  // Check that dest points to memory that is at least as aligned as src.
978  Align srcAlign = srcAlloca->getAlign();
979  bool isDestSufficientlyAligned = srcAlign <= cpyAlign;
980  // If dest is not aligned enough and we can't increase its alignment then
981  // bail out.
982  if (!isDestSufficientlyAligned && !isa<AllocaInst>(cpyDest)) {
983  LLVM_DEBUG(dbgs() << "Call Slot: Dest not sufficiently aligned\n");
984  return false;
985  }
986 
987  // Check that src is not accessed except via the call and the memcpy. This
988  // guarantees that it holds only undefined values when passed in (so the final
989  // memcpy can be dropped), that it is not read or written between the call and
990  // the memcpy, and that writing beyond the end of it is undefined.
991  SmallVector<User *, 8> srcUseList(srcAlloca->users());
992  while (!srcUseList.empty()) {
993  User *U = srcUseList.pop_back_val();
994 
995  if (isa<BitCastInst>(U) || isa<AddrSpaceCastInst>(U)) {
996  append_range(srcUseList, U->users());
997  continue;
998  }
999  if (const auto *G = dyn_cast<GetElementPtrInst>(U)) {
1000  if (!G->hasAllZeroIndices())
1001  return false;
1002 
1003  append_range(srcUseList, U->users());
1004  continue;
1005  }
1006  if (const auto *IT = dyn_cast<IntrinsicInst>(U))
1007  if (IT->isLifetimeStartOrEnd())
1008  continue;
1009 
1010  if (U != C && U != cpyLoad)
1011  return false;
1012  }
1013 
1014  // Check whether src is captured by the called function, in which case there
1015  // may be further indirect uses of src.
1016  bool SrcIsCaptured = any_of(C->args(), [&](Use &U) {
1017  return U->stripPointerCasts() == cpySrc &&
1018  !C->doesNotCapture(C->getArgOperandNo(&U));
1019  });
1020 
1021  // If src is captured, then check whether there are any potential uses of
1022  // src through the captured pointer before the lifetime of src ends, either
1023  // due to a lifetime.end or a return from the function.
1024  if (SrcIsCaptured) {
1025  // Check that dest is not captured before/at the call. We have already
1026  // checked that src is not captured before it. If either had been captured,
1027  // then the call might be comparing the argument against the captured dest
1028  // or src pointer.
1029  Value *DestObj = getUnderlyingObject(cpyDest);
1030  if (!isIdentifiedFunctionLocal(DestObj) ||
1031  PointerMayBeCapturedBefore(DestObj, /* ReturnCaptures */ true,
1032  /* StoreCaptures */ true, C, DT,
1033  /* IncludeI */ true))
1034  return false;
1035 
1036  MemoryLocation SrcLoc =
1037  MemoryLocation(srcAlloca, LocationSize::precise(srcSize));
1038  for (Instruction &I :
1039  make_range(++C->getIterator(), C->getParent()->end())) {
1040  // Lifetime of srcAlloca ends at lifetime.end.
1041  if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
1042  if (II->getIntrinsicID() == Intrinsic::lifetime_end &&
1043  II->getArgOperand(1)->stripPointerCasts() == srcAlloca &&
1044  cast<ConstantInt>(II->getArgOperand(0))->uge(srcSize))
1045  break;
1046  }
1047 
1048  // Lifetime of srcAlloca ends at return.
1049  if (isa<ReturnInst>(&I))
1050  break;
1051 
1052  // Ignore the direct read of src in the load.
1053  if (&I == cpyLoad)
1054  continue;
1055 
1056  // Check whether this instruction may mod/ref src through the captured
1057  // pointer (we have already any direct mod/refs in the loop above).
1058  // Also bail if we hit a terminator, as we don't want to scan into other
1059  // blocks.
1060  if (isModOrRefSet(AA->getModRefInfo(&I, SrcLoc)) || I.isTerminator())
1061  return false;
1062  }
1063  }
1064 
1065  // Since we're changing the parameter to the callsite, we need to make sure
1066  // that what would be the new parameter dominates the callsite.
1067  if (!DT->dominates(cpyDest, C)) {
1068  // Support moving a constant index GEP before the call.
1069  auto *GEP = dyn_cast<GetElementPtrInst>(cpyDest);
1070  if (GEP && GEP->hasAllConstantIndices() &&
1071  DT->dominates(GEP->getPointerOperand(), C))
1072  GEP->moveBefore(C);
1073  else
1074  return false;
1075  }
1076 
1077  // In addition to knowing that the call does not access src in some
1078  // unexpected manner, for example via a global, which we deduce from
1079  // the use analysis, we also need to know that it does not sneakily
1080  // access dest. We rely on AA to figure this out for us.
1081  ModRefInfo MR = AA->getModRefInfo(C, cpyDest, LocationSize::precise(srcSize));
1082  // If necessary, perform additional analysis.
1083  if (isModOrRefSet(MR))
1084  MR = AA->callCapturesBefore(C, cpyDest, LocationSize::precise(srcSize), DT);
1085  if (isModOrRefSet(MR))
1086  return false;
1087 
1088  // We can't create address space casts here because we don't know if they're
1089  // safe for the target.
1090  if (cpySrc->getType()->getPointerAddressSpace() !=
1091  cpyDest->getType()->getPointerAddressSpace())
1092  return false;
1093  for (unsigned ArgI = 0; ArgI < C->arg_size(); ++ArgI)
1094  if (C->getArgOperand(ArgI)->stripPointerCasts() == cpySrc &&
1095  cpySrc->getType()->getPointerAddressSpace() !=
1096  C->getArgOperand(ArgI)->getType()->getPointerAddressSpace())
1097  return false;
1098 
1099  // All the checks have passed, so do the transformation.
1100  bool changedArgument = false;
1101  for (unsigned ArgI = 0; ArgI < C->arg_size(); ++ArgI)
1102  if (C->getArgOperand(ArgI)->stripPointerCasts() == cpySrc) {
1103  Value *Dest = cpySrc->getType() == cpyDest->getType() ? cpyDest
1104  : CastInst::CreatePointerCast(cpyDest, cpySrc->getType(),
1105  cpyDest->getName(), C);
1106  changedArgument = true;
1107  if (C->getArgOperand(ArgI)->getType() == Dest->getType())
1108  C->setArgOperand(ArgI, Dest);
1109  else
1110  C->setArgOperand(ArgI, CastInst::CreatePointerCast(
1111  Dest, C->getArgOperand(ArgI)->getType(),
1112  Dest->getName(), C));
1113  }
1114 
1115  if (!changedArgument)
1116  return false;
1117 
1118  // If the destination wasn't sufficiently aligned then increase its alignment.
1119  if (!isDestSufficientlyAligned) {
1120  assert(isa<AllocaInst>(cpyDest) && "Can only increase alloca alignment!");
1121  cast<AllocaInst>(cpyDest)->setAlignment(srcAlign);
1122  }
1123 
1124  if (SkippedLifetimeStart) {
1125  SkippedLifetimeStart->moveBefore(C);
1126  MSSAU->moveBefore(MSSA->getMemoryAccess(SkippedLifetimeStart),
1127  MSSA->getMemoryAccess(C));
1128  }
1129 
1130  // Update AA metadata
1131  // FIXME: MD_tbaa_struct and MD_mem_parallel_loop_access should also be
1132  // handled here, but combineMetadata doesn't support them yet
1133  unsigned KnownIDs[] = {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
1134  LLVMContext::MD_noalias,
1135  LLVMContext::MD_invariant_group,
1136  LLVMContext::MD_access_group};
1137  combineMetadata(C, cpyLoad, KnownIDs, true);
1138  if (cpyLoad != cpyStore)
1139  combineMetadata(C, cpyStore, KnownIDs, true);
1140 
1141  ++NumCallSlot;
1142  return true;
1143 }
1144 
1145 /// We've found that the (upward scanning) memory dependence of memcpy 'M' is
1146 /// the memcpy 'MDep'. Try to simplify M to copy from MDep's input if we can.
1147 bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
1148  MemCpyInst *MDep) {
1149  // We can only transforms memcpy's where the dest of one is the source of the
1150  // other.
1151  if (M->getSource() != MDep->getDest() || MDep->isVolatile())
1152  return false;
1153 
1154  // If dep instruction is reading from our current input, then it is a noop
1155  // transfer and substituting the input won't change this instruction. Just
1156  // ignore the input and let someone else zap MDep. This handles cases like:
1157  // memcpy(a <- a)
1158  // memcpy(b <- a)
1159  if (M->getSource() == MDep->getSource())
1160  return false;
1161 
1162  // Second, the length of the memcpy's must be the same, or the preceding one
1163  // must be larger than the following one.
1164  if (MDep->getLength() != M->getLength()) {
1165  auto *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
1166  auto *MLen = dyn_cast<ConstantInt>(M->getLength());
1167  if (!MDepLen || !MLen || MDepLen->getZExtValue() < MLen->getZExtValue())
1168  return false;
1169  }
1170 
1171  // Verify that the copied-from memory doesn't change in between the two
1172  // transfers. For example, in:
1173  // memcpy(a <- b)
1174  // *b = 42;
1175  // memcpy(c <- a)
1176  // It would be invalid to transform the second memcpy into memcpy(c <- b).
1177  //
1178  // TODO: If the code between M and MDep is transparent to the destination "c",
1179  // then we could still perform the xform by moving M up to the first memcpy.
1180  // TODO: It would be sufficient to check the MDep source up to the memcpy
1181  // size of M, rather than MDep.
1182  if (writtenBetween(MSSA, *AA, MemoryLocation::getForSource(MDep),
1183  MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(M)))
1184  return false;
1185 
1186  // If the dest of the second might alias the source of the first, then the
1187  // source and dest might overlap. In addition, if the source of the first
1188  // points to constant memory, they won't overlap by definition. Otherwise, we
1189  // still want to eliminate the intermediate value, but we have to generate a
1190  // memmove instead of memcpy.
1191  bool UseMemMove = false;
1193  UseMemMove = true;
1194 
1195  // If all checks passed, then we can transform M.
1196  LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy->memcpy src:\n"
1197  << *MDep << '\n' << *M << '\n');
1198 
1199  // TODO: Is this worth it if we're creating a less aligned memcpy? For
1200  // example we could be moving from movaps -> movq on x86.
1201  IRBuilder<> Builder(M);
1202  Instruction *NewM;
1203  if (UseMemMove)
1204  NewM = Builder.CreateMemMove(M->getRawDest(), M->getDestAlign(),
1205  MDep->getRawSource(), MDep->getSourceAlign(),
1206  M->getLength(), M->isVolatile());
1207  else if (isa<MemCpyInlineInst>(M)) {
1208  // llvm.memcpy may be promoted to llvm.memcpy.inline, but the converse is
1209  // never allowed since that would allow the latter to be lowered as a call
1210  // to an external function.
1211  NewM = Builder.CreateMemCpyInline(
1212  M->getRawDest(), M->getDestAlign(), MDep->getRawSource(),
1213  MDep->getSourceAlign(), M->getLength(), M->isVolatile());
1214  } else
1215  NewM = Builder.CreateMemCpy(M->getRawDest(), M->getDestAlign(),
1216  MDep->getRawSource(), MDep->getSourceAlign(),
1217  M->getLength(), M->isVolatile());
1218  NewM->copyMetadata(*M, LLVMContext::MD_DIAssignID);
1219 
1220  assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M)));
1221  auto *LastDef = cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M));
1222  auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
1223  MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1224 
1225  // Remove the instruction we're replacing.
1226  eraseInstruction(M);
1227  ++NumMemCpyInstr;
1228  return true;
1229 }
1230 
1231 /// We've found that the (upward scanning) memory dependence of \p MemCpy is
1232 /// \p MemSet. Try to simplify \p MemSet to only set the trailing bytes that
1233 /// weren't copied over by \p MemCpy.
1234 ///
1235 /// In other words, transform:
1236 /// \code
1237 /// memset(dst, c, dst_size);
1238 /// memcpy(dst, src, src_size);
1239 /// \endcode
1240 /// into:
1241 /// \code
1242 /// memcpy(dst, src, src_size);
1243 /// memset(dst + src_size, c, dst_size <= src_size ? 0 : dst_size - src_size);
1244 /// \endcode
1245 bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
1246  MemSetInst *MemSet) {
1247  // We can only transform memset/memcpy with the same destination.
1248  if (!AA->isMustAlias(MemSet->getDest(), MemCpy->getDest()))
1249  return false;
1250 
1251  // Check that src and dst of the memcpy aren't the same. While memcpy
1252  // operands cannot partially overlap, exact equality is allowed.
1253  if (isModSet(AA->getModRefInfo(MemCpy, MemoryLocation::getForSource(MemCpy))))
1254  return false;
1255 
1256  // We know that dst up to src_size is not written. We now need to make sure
1257  // that dst up to dst_size is not accessed. (If we did not move the memset,
1258  // checking for reads would be sufficient.)
1260  MSSA->getMemoryAccess(MemSet),
1261  MSSA->getMemoryAccess(MemCpy)))
1262  return false;
1263 
1264  // Use the same i8* dest as the memcpy, killing the memset dest if different.
1265  Value *Dest = MemCpy->getRawDest();
1266  Value *DestSize = MemSet->getLength();
1267  Value *SrcSize = MemCpy->getLength();
1268 
1269  if (mayBeVisibleThroughUnwinding(Dest, MemSet, MemCpy))
1270  return false;
1271 
1272  // If the sizes are the same, simply drop the memset instead of generating
1273  // a replacement with zero size.
1274  if (DestSize == SrcSize) {
1275  eraseInstruction(MemSet);
1276  return true;
1277  }
1278 
1279  // By default, create an unaligned memset.
1280  Align Alignment = Align(1);
1281  // If Dest is aligned, and SrcSize is constant, use the minimum alignment
1282  // of the sum.
1283  const Align DestAlign = std::max(MemSet->getDestAlign().valueOrOne(),
1284  MemCpy->getDestAlign().valueOrOne());
1285  if (DestAlign > 1)
1286  if (auto *SrcSizeC = dyn_cast<ConstantInt>(SrcSize))
1287  Alignment = commonAlignment(DestAlign, SrcSizeC->getZExtValue());
1288 
1289  IRBuilder<> Builder(MemCpy);
1290 
1291  // If the sizes have different types, zext the smaller one.
1292  if (DestSize->getType() != SrcSize->getType()) {
1293  if (DestSize->getType()->getIntegerBitWidth() >
1294  SrcSize->getType()->getIntegerBitWidth())
1295  SrcSize = Builder.CreateZExt(SrcSize, DestSize->getType());
1296  else
1297  DestSize = Builder.CreateZExt(DestSize, SrcSize->getType());
1298  }
1299 
1300  Value *Ule = Builder.CreateICmpULE(DestSize, SrcSize);
1301  Value *SizeDiff = Builder.CreateSub(DestSize, SrcSize);
1302  Value *MemsetLen = Builder.CreateSelect(
1303  Ule, ConstantInt::getNullValue(DestSize->getType()), SizeDiff);
1304  unsigned DestAS = Dest->getType()->getPointerAddressSpace();
1305  Instruction *NewMemSet = Builder.CreateMemSet(
1306  Builder.CreateGEP(
1307  Builder.getInt8Ty(),
1308  Builder.CreatePointerCast(Dest, Builder.getInt8PtrTy(DestAS)),
1309  SrcSize),
1310  MemSet->getOperand(1), MemsetLen, Alignment);
1311 
1312  assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)) &&
1313  "MemCpy must be a MemoryDef");
1314  // The new memset is inserted after the memcpy, but it is known that its
1315  // defining access is the memset about to be removed which immediately
1316  // precedes the memcpy.
1317  auto *LastDef =
1318  cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy));
1319  auto *NewAccess = MSSAU->createMemoryAccessBefore(
1320  NewMemSet, LastDef->getDefiningAccess(), LastDef);
1321  MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1322 
1323  eraseInstruction(MemSet);
1324  return true;
1325 }
1326 
1327 /// Determine whether the instruction has undefined content for the given Size,
1328 /// either because it was freshly alloca'd or started its lifetime.
1329 static bool hasUndefContents(MemorySSA *MSSA, AliasAnalysis *AA, Value *V,
1330  MemoryDef *Def, Value *Size) {
1331  if (MSSA->isLiveOnEntryDef(Def))
1332  return isa<AllocaInst>(getUnderlyingObject(V));
1333 
1334  if (auto *II = dyn_cast_or_null<IntrinsicInst>(Def->getMemoryInst())) {
1335  if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
1336  auto *LTSize = cast<ConstantInt>(II->getArgOperand(0));
1337 
1338  if (auto *CSize = dyn_cast<ConstantInt>(Size)) {
1339  if (AA->isMustAlias(V, II->getArgOperand(1)) &&
1340  LTSize->getZExtValue() >= CSize->getZExtValue())
1341  return true;
1342  }
1343 
1344  // If the lifetime.start covers a whole alloca (as it almost always
1345  // does) and we're querying a pointer based on that alloca, then we know
1346  // the memory is definitely undef, regardless of how exactly we alias.
1347  // The size also doesn't matter, as an out-of-bounds access would be UB.
1348  if (auto *Alloca = dyn_cast<AllocaInst>(getUnderlyingObject(V))) {
1349  if (getUnderlyingObject(II->getArgOperand(1)) == Alloca) {
1350  const DataLayout &DL = Alloca->getModule()->getDataLayout();
1351  if (Optional<TypeSize> AllocaSize =
1352  Alloca->getAllocationSizeInBits(DL))
1353  if (*AllocaSize == LTSize->getValue() * 8)
1354  return true;
1355  }
1356  }
1357  }
1358  }
1359 
1360  return false;
1361 }
1362 
1363 /// Transform memcpy to memset when its source was just memset.
1364 /// In other words, turn:
1365 /// \code
1366 /// memset(dst1, c, dst1_size);
1367 /// memcpy(dst2, dst1, dst2_size);
1368 /// \endcode
1369 /// into:
1370 /// \code
1371 /// memset(dst1, c, dst1_size);
1372 /// memset(dst2, c, dst2_size);
1373 /// \endcode
1374 /// When dst2_size <= dst1_size.
1375 bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
1376  MemSetInst *MemSet) {
1377  // Make sure that memcpy(..., memset(...), ...), that is we are memsetting and
1378  // memcpying from the same address. Otherwise it is hard to reason about.
1379  if (!AA->isMustAlias(MemSet->getRawDest(), MemCpy->getRawSource()))
1380  return false;
1381 
1382  Value *MemSetSize = MemSet->getLength();
1383  Value *CopySize = MemCpy->getLength();
1384 
1385  if (MemSetSize != CopySize) {
1386  // Make sure the memcpy doesn't read any more than what the memset wrote.
1387  // Don't worry about sizes larger than i64.
1388 
1389  // A known memset size is required.
1390  auto *CMemSetSize = dyn_cast<ConstantInt>(MemSetSize);
1391  if (!CMemSetSize)
1392  return false;
1393 
1394  // A known memcpy size is also required.
1395  auto *CCopySize = dyn_cast<ConstantInt>(CopySize);
1396  if (!CCopySize)
1397  return false;
1398  if (CCopySize->getZExtValue() > CMemSetSize->getZExtValue()) {
1399  // If the memcpy is larger than the memset, but the memory was undef prior
1400  // to the memset, we can just ignore the tail. Technically we're only
1401  // interested in the bytes from MemSetSize..CopySize here, but as we can't
1402  // easily represent this location, we use the full 0..CopySize range.
1403  MemoryLocation MemCpyLoc = MemoryLocation::getForSource(MemCpy);
1404  bool CanReduceSize = false;
1405  MemoryUseOrDef *MemSetAccess = MSSA->getMemoryAccess(MemSet);
1406  MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
1407  MemSetAccess->getDefiningAccess(), MemCpyLoc);
1408  if (auto *MD = dyn_cast<MemoryDef>(Clobber))
1409  if (hasUndefContents(MSSA, AA, MemCpy->getSource(), MD, CopySize))
1410  CanReduceSize = true;
1411 
1412  if (!CanReduceSize)
1413  return false;
1414  CopySize = MemSetSize;
1415  }
1416  }
1417 
1418  IRBuilder<> Builder(MemCpy);
1419  Instruction *NewM =
1420  Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1),
1421  CopySize, MaybeAlign(MemCpy->getDestAlignment()));
1422  auto *LastDef =
1423  cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy));
1424  auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
1425  MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1426 
1427  return true;
1428 }
1429 
1430 /// Perform simplification of memcpy's. If we have memcpy A
1431 /// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite
1432 /// B to be a memcpy from X to Z (or potentially a memmove, depending on
1433 /// circumstances). This allows later passes to remove the first memcpy
1434 /// altogether.
1435 bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
1436  // We can only optimize non-volatile memcpy's.
1437  if (M->isVolatile()) return false;
1438 
1439  // If the source and destination of the memcpy are the same, then zap it.
1440  if (M->getSource() == M->getDest()) {
1441  ++BBI;
1442  eraseInstruction(M);
1443  return true;
1444  }
1445 
1446  // If copying from a constant, try to turn the memcpy into a memset.
1447  if (auto *GV = dyn_cast<GlobalVariable>(M->getSource()))
1448  if (GV->isConstant() && GV->hasDefinitiveInitializer())
1449  if (Value *ByteVal = isBytewiseValue(GV->getInitializer(),
1450  M->getModule()->getDataLayout())) {
1451  IRBuilder<> Builder(M);
1452  Instruction *NewM =
1453  Builder.CreateMemSet(M->getRawDest(), ByteVal, M->getLength(),
1454  MaybeAlign(M->getDestAlignment()), false);
1455  auto *LastDef =
1456  cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M));
1457  auto *NewAccess =
1458  MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
1459  MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1460 
1461  eraseInstruction(M);
1462  ++NumCpyToSet;
1463  return true;
1464  }
1465 
1466  MemoryUseOrDef *MA = MSSA->getMemoryAccess(M);
1467  // FIXME: Not using getClobberingMemoryAccess() here due to PR54682.
1468  MemoryAccess *AnyClobber = MA->getDefiningAccess();
1470  const MemoryAccess *DestClobber =
1471  MSSA->getWalker()->getClobberingMemoryAccess(AnyClobber, DestLoc);
1472 
1473  // Try to turn a partially redundant memset + memcpy into
1474  // memcpy + smaller memset. We don't need the memcpy size for this.
1475  // The memcpy most post-dom the memset, so limit this to the same basic
1476  // block. A non-local generalization is likely not worthwhile.
1477  if (auto *MD = dyn_cast<MemoryDef>(DestClobber))
1478  if (auto *MDep = dyn_cast_or_null<MemSetInst>(MD->getMemoryInst()))
1479  if (DestClobber->getBlock() == M->getParent())
1480  if (processMemSetMemCpyDependence(M, MDep))
1481  return true;
1482 
1483  MemoryAccess *SrcClobber = MSSA->getWalker()->getClobberingMemoryAccess(
1484  AnyClobber, MemoryLocation::getForSource(M));
1485 
1486  // There are four possible optimizations we can do for memcpy:
1487  // a) memcpy-memcpy xform which exposes redundance for DSE.
1488  // b) call-memcpy xform for return slot optimization.
1489  // c) memcpy from freshly alloca'd space or space that has just started
1490  // its lifetime copies undefined data, and we can therefore eliminate
1491  // the memcpy in favor of the data that was already at the destination.
1492  // d) memcpy from a just-memset'd source can be turned into memset.
1493  if (auto *MD = dyn_cast<MemoryDef>(SrcClobber)) {
1494  if (Instruction *MI = MD->getMemoryInst()) {
1495  if (auto *CopySize = dyn_cast<ConstantInt>(M->getLength())) {
1496  if (auto *C = dyn_cast<CallInst>(MI)) {
1497  // FIXME: Can we pass in either of dest/src alignment here instead
1498  // of conservatively taking the minimum?
1499  Align Alignment = std::min(M->getDestAlign().valueOrOne(),
1500  M->getSourceAlign().valueOrOne());
1501  if (performCallSlotOptzn(
1502  M, M, M->getDest(), M->getSource(),
1503  TypeSize::getFixed(CopySize->getZExtValue()), Alignment,
1504  [C]() -> CallInst * { return C; })) {
1505  LLVM_DEBUG(dbgs() << "Performed call slot optimization:\n"
1506  << " call: " << *C << "\n"
1507  << " memcpy: " << *M << "\n");
1508  eraseInstruction(M);
1509  ++NumMemCpyInstr;
1510  return true;
1511  }
1512  }
1513  }
1514  if (auto *MDep = dyn_cast<MemCpyInst>(MI))
1515  return processMemCpyMemCpyDependence(M, MDep);
1516  if (auto *MDep = dyn_cast<MemSetInst>(MI)) {
1517  if (performMemCpyToMemSetOptzn(M, MDep)) {
1518  LLVM_DEBUG(dbgs() << "Converted memcpy to memset\n");
1519  eraseInstruction(M);
1520  ++NumCpyToSet;
1521  return true;
1522  }
1523  }
1524  }
1525 
1526  if (hasUndefContents(MSSA, AA, M->getSource(), MD, M->getLength())) {
1527  LLVM_DEBUG(dbgs() << "Removed memcpy from undef\n");
1528  eraseInstruction(M);
1529  ++NumMemCpyInstr;
1530  return true;
1531  }
1532  }
1533 
1534  return false;
1535 }
1536 
1537 /// Transforms memmove calls to memcpy calls when the src/dst are guaranteed
1538 /// not to alias.
1539 bool MemCpyOptPass::processMemMove(MemMoveInst *M) {
1540  // See if the source could be modified by this memmove potentially.
1542  return false;
1543 
1544  LLVM_DEBUG(dbgs() << "MemCpyOptPass: Optimizing memmove -> memcpy: " << *M
1545  << "\n");
1546 
1547  // If not, then we know we can transform this.
1548  Type *ArgTys[3] = { M->getRawDest()->getType(),
1549  M->getRawSource()->getType(),
1550  M->getLength()->getType() };
1551  M->setCalledFunction(Intrinsic::getDeclaration(M->getModule(),
1552  Intrinsic::memcpy, ArgTys));
1553 
1554  // For MemorySSA nothing really changes (except that memcpy may imply stricter
1555  // aliasing guarantees).
1556 
1557  ++NumMoveToCpy;
1558  return true;
1559 }
1560 
1561 /// This is called on every byval argument in call sites.
1562 bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) {
1563  const DataLayout &DL = CB.getCaller()->getParent()->getDataLayout();
1564  // Find out what feeds this byval argument.
1565  Value *ByValArg = CB.getArgOperand(ArgNo);
1566  Type *ByValTy = CB.getParamByValType(ArgNo);
1567  TypeSize ByValSize = DL.getTypeAllocSize(ByValTy);
1568  MemoryLocation Loc(ByValArg, LocationSize::precise(ByValSize));
1569  MemoryUseOrDef *CallAccess = MSSA->getMemoryAccess(&CB);
1570  if (!CallAccess)
1571  return false;
1572  MemCpyInst *MDep = nullptr;
1573  MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
1574  CallAccess->getDefiningAccess(), Loc);
1575  if (auto *MD = dyn_cast<MemoryDef>(Clobber))
1576  MDep = dyn_cast_or_null<MemCpyInst>(MD->getMemoryInst());
1577 
1578  // If the byval argument isn't fed by a memcpy, ignore it. If it is fed by
1579  // a memcpy, see if we can byval from the source of the memcpy instead of the
1580  // result.
1581  if (!MDep || MDep->isVolatile() ||
1582  ByValArg->stripPointerCasts() != MDep->getDest())
1583  return false;
1584 
1585  // The length of the memcpy must be larger or equal to the size of the byval.
1586  auto *C1 = dyn_cast<ConstantInt>(MDep->getLength());
1587  if (!C1 || !TypeSize::isKnownGE(
1588  TypeSize::getFixed(C1->getValue().getZExtValue()), ByValSize))
1589  return false;
1590 
1591  // Get the alignment of the byval. If the call doesn't specify the alignment,
1592  // then it is some target specific value that we can't know.
1593  MaybeAlign ByValAlign = CB.getParamAlign(ArgNo);
1594  if (!ByValAlign) return false;
1595 
1596  // If it is greater than the memcpy, then we check to see if we can force the
1597  // source of the memcpy to the alignment we need. If we fail, we bail out.
1598  MaybeAlign MemDepAlign = MDep->getSourceAlign();
1599  if ((!MemDepAlign || *MemDepAlign < *ByValAlign) &&
1600  getOrEnforceKnownAlignment(MDep->getSource(), ByValAlign, DL, &CB, AC,
1601  DT) < *ByValAlign)
1602  return false;
1603 
1604  // The address space of the memcpy source must match the byval argument
1605  if (MDep->getSource()->getType()->getPointerAddressSpace() !=
1606  ByValArg->getType()->getPointerAddressSpace())
1607  return false;
1608 
1609  // Verify that the copied-from memory doesn't change in between the memcpy and
1610  // the byval call.
1611  // memcpy(a <- b)
1612  // *b = 42;
1613  // foo(*a)
1614  // It would be invalid to transform the second memcpy into foo(*b).
1615  if (writtenBetween(MSSA, *AA, MemoryLocation::getForSource(MDep),
1616  MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(&CB)))
1617  return false;
1618 
1619  Value *TmpCast = MDep->getSource();
1620  if (MDep->getSource()->getType() != ByValArg->getType()) {
1621  BitCastInst *TmpBitCast = new BitCastInst(MDep->getSource(), ByValArg->getType(),
1622  "tmpcast", &CB);
1623  // Set the tmpcast's DebugLoc to MDep's
1624  TmpBitCast->setDebugLoc(MDep->getDebugLoc());
1625  TmpCast = TmpBitCast;
1626  }
1627 
1628  LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy to byval:\n"
1629  << " " << *MDep << "\n"
1630  << " " << CB << "\n");
1631 
1632  // Otherwise we're good! Update the byval argument.
1633  CB.setArgOperand(ArgNo, TmpCast);
1634  ++NumMemCpyInstr;
1635  return true;
1636 }
1637 
1638 /// Executes one iteration of MemCpyOptPass.
1639 bool MemCpyOptPass::iterateOnFunction(Function &F) {
1640  bool MadeChange = false;
1641 
1642  // Walk all instruction in the function.
1643  for (BasicBlock &BB : F) {
1644  // Skip unreachable blocks. For example processStore assumes that an
1645  // instruction in a BB can't be dominated by a later instruction in the
1646  // same BB (which is a scenario that can happen for an unreachable BB that
1647  // has itself as a predecessor).
1648  if (!DT->isReachableFromEntry(&BB))
1649  continue;
1650 
1651  for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) {
1652  // Avoid invalidating the iterator.
1653  Instruction *I = &*BI++;
1654 
1655  bool RepeatInstruction = false;
1656 
1657  if (auto *SI = dyn_cast<StoreInst>(I))
1658  MadeChange |= processStore(SI, BI);
1659  else if (auto *M = dyn_cast<MemSetInst>(I))
1660  RepeatInstruction = processMemSet(M, BI);
1661  else if (auto *M = dyn_cast<MemCpyInst>(I))
1662  RepeatInstruction = processMemCpy(M, BI);
1663  else if (auto *M = dyn_cast<MemMoveInst>(I))
1664  RepeatInstruction = processMemMove(M);
1665  else if (auto *CB = dyn_cast<CallBase>(I)) {
1666  for (unsigned i = 0, e = CB->arg_size(); i != e; ++i)
1667  if (CB->isByValArgument(i))
1668  MadeChange |= processByValArgument(*CB, i);
1669  }
1670 
1671  // Reprocess the instruction if desired.
1672  if (RepeatInstruction) {
1673  if (BI != BB.begin())
1674  --BI;
1675  MadeChange = true;
1676  }
1677  }
1678  }
1679 
1680  return MadeChange;
1681 }
1682 
1684  auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
1685  auto *AA = &AM.getResult<AAManager>(F);
1686  auto *AC = &AM.getResult<AssumptionAnalysis>(F);
1687  auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
1688  auto *MSSA = &AM.getResult<MemorySSAAnalysis>(F);
1689 
1690  bool MadeChange = runImpl(F, &TLI, AA, AC, DT, &MSSA->getMSSA());
1691  if (!MadeChange)
1692  return PreservedAnalyses::all();
1693 
1694  PreservedAnalyses PA;
1695  PA.preserveSet<CFGAnalyses>();
1697  return PA;
1698 }
1699 
1701  AliasAnalysis *AA_, AssumptionCache *AC_,
1702  DominatorTree *DT_, MemorySSA *MSSA_) {
1703  bool MadeChange = false;
1704  TLI = TLI_;
1705  AA = AA_;
1706  AC = AC_;
1707  DT = DT_;
1708  MSSA = MSSA_;
1709  MemorySSAUpdater MSSAU_(MSSA_);
1710  MSSAU = &MSSAU_;
1711 
1712  while (true) {
1713  if (!iterateOnFunction(F))
1714  break;
1715  MadeChange = true;
1716  }
1717 
1718  if (VerifyMemorySSA)
1719  MSSA_->verifyMemorySSA();
1720 
1721  return MadeChange;
1722 }
1723 
1724 /// This is the main transformation entry point for a function.
1726  if (skipFunction(F))
1727  return false;
1728 
1729  auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
1730  auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
1731  auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1732  auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1733  auto *MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
1734 
1735  return Impl.runImpl(F, TLI, AA, AC, DT, MSSA);
1736 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:77
i
i
Definition: README.txt:29
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
AssumptionCache.h
llvm::AAManager
A manager for alias analyses.
Definition: AliasAnalysis.h:876
llvm::MemIntrinsicBase::getDestAlignment
unsigned getDestAlignment() const
FIXME: Remove this function once transition to Align is over.
Definition: IntrinsicInst.h:832
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:108
MathExtras.h
llvm::MemoryLocation::get
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
Definition: MemoryLocation.cpp:35
llvm::combineMetadata
void combineMetadata(Instruction *K, const Instruction *J, ArrayRef< unsigned > KnownIDs, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition: Local.cpp:2617
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::tgtok::Def
@ Def
Definition: TGLexer.h:50
llvm::MemorySSA::verifyMemorySSA
void verifyMemorySSA(VerificationLevel=VerificationLevel::Fast) const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
Definition: MemorySSA.cpp:1869
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::Instruction::getModule
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:69
llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:53
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::AArch64PACKey::ID
ID
Definition: AArch64BaseInfo.h:818
llvm::Value::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
llvm::Intrinsic::getDeclaration
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1481
llvm::MemorySSAUpdater::createMemoryAccessBefore
MemoryUseOrDef * createMemoryAccessBefore(Instruction *I, MemoryAccess *Definition, MemoryUseOrDef *InsertPt)
Create a MemoryAccess in MemorySSA before or after an existing MemoryAccess.
Definition: MemorySSAUpdater.cpp:1437
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:87
IntrinsicInst.h
llvm::MemIntrinsicBase::getDestAlign
MaybeAlign getDestAlign() const
Definition: IntrinsicInst.h:837
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:774
Scalar.h
llvm::TypeSize::getFixedSize
ScalarTy getFixedSize() const
Definition: TypeSize.h:444
Loads.h
MemorySSAUpdater.h
llvm::Function
Definition: Function.h:60
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
Pass.h
llvm::MemMoveInst
This class wraps the llvm.memmove intrinsic.
Definition: IntrinsicInst.h:1138
llvm::BitCastInst
This class represents a no-op cast from one type to another.
Definition: Instructions.h:5255
C1
instcombine should handle this C2 when C1
Definition: README.txt:263
llvm::Type::getScalarType
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:328
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1199
Statistic.h
CaptureTracking.h
memcpyopt
memcpyopt
Definition: MemCpyOptimizer.cpp:303
llvm::Type::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:729
llvm::IRBuilder<>
llvm::isModSet
bool isModSet(const ModRefInfo MRI)
Definition: ModRef.h:48
llvm::MemorySSA::dominates
bool dominates(const MemoryAccess *A, const MemoryAccess *B) const
Given two memory accesses in potentially different blocks, determine whether MemoryAccess A dominates...
Definition: MemorySSA.cpp:2122
ValueTracking.h
Local.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
GlobalsModRef.h
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:140
IT
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
Module.h
EnableMemCpyOptWithoutLibcalls
static cl::opt< bool > EnableMemCpyOptWithoutLibcalls("enable-memcpyopt-without-libcalls", cl::Hidden, cl::desc("Enable memcpyopt even when libcalls are disabled"))
llvm::getOrEnforceKnownAlignment
Align getOrEnforceKnownAlignment(Value *V, MaybeAlign PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
Definition: Local.cpp:1417
llvm::ilist_node_impl::getReverseIterator
reverse_self_iterator getReverseIterator()
Definition: ilist_node.h:85
llvm::MaybeAlign::valueOrOne
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Definition: Alignment.h:142
llvm::sys::path::end
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:235
llvm::sys::path::begin
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:226
llvm::isBytewiseValue
Value * isBytewiseValue(Value *V, const DataLayout &DL)
If the specified value can be set by repeating the same byte in memory, return the i8 value that it i...
Definition: ValueTracking.cpp:3931
llvm::MemCpyOptPass::runImpl
bool runImpl(Function &F, TargetLibraryInfo *TLI, AAResults *AA, AssumptionCache *AC, DominatorTree *DT, MemorySSA *MSSA)
Definition: MemCpyOptimizer.cpp:1700
llvm::isModOrRefSet
bool isModOrRefSet(const ModRefInfo MRI)
Definition: ModRef.h:42
llvm::Optional< int64_t >
T
#define T
Definition: Mips16ISelLowering.cpp:341
llvm::createMemCpyOptPass
FunctionPass * createMemCpyOptPass()
The public interface to this file...
Definition: MemCpyOptimizer.cpp:293
llvm::CallBase::isByValArgument
bool isByValArgument(unsigned ArgNo) const
Determine whether this argument is passed by value.
Definition: InstrTypes.h:1678
llvm::max
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:337
llvm::Instruction::copyMetadata
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Definition: Instruction.cpp:871
STLExtras.h
llvm::LoadInst::getPointerOperand
Value * getPointerOperand()
Definition: Instructions.h:260
llvm::LoadInst::getAlign
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:216
llvm::LinearPolySize::isScalable
bool isScalable() const
Returns whether the size is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:298
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::commonAlignment
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:213
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:265
llvm::MemSetBase::getValue
Value * getValue() const
Definition: IntrinsicInst.h:933
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
AliasAnalysis.h
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::isIdentifiedFunctionLocal
bool isIdentifiedFunctionLocal(const Value *V)
Return true if V is umabigously identified at the function-level.
Definition: AliasAnalysis.cpp:903
llvm::DominatorTree::dominates
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
llvm::isPointerOffset
Optional< int64_t > isPointerOffset(const Value *Ptr1, const Value *Ptr2, const DataLayout &DL)
If Ptr1 is provably equal to Ptr2 plus a constant offset, return that offset.
Definition: ValueTracking.cpp:7358
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:187
Instruction.h
llvm::cast
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition: Casting.h:565
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::MemorySSAWrapperPass
Legacy analysis pass which computes MemorySSA.
Definition: MemorySSA.h:984
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:24
Constants.h
llvm::AAResults
Definition: AliasAnalysis.h:294
llvm::MemorySSA::isLiveOnEntryDef
bool isLiveOnEntryDef(const MemoryAccess *MA) const
Return true if MA represents the live on entry value.
Definition: MemorySSA.h:737
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::User
Definition: User.h:44
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(MemCpyOptLegacyPass, "memcpyopt", "MemCpy Optimization", false, false) INITIALIZE_PASS_END(MemCpyOptLegacyPass
Intrinsics.h
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
InstrTypes.h
llvm::initializeMemCpyOptLegacyPassPass
void initializeMemCpyOptLegacyPassPass(PassRegistry &)
SI
@ SI
Definition: SIInstrInfo.cpp:7882
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::MemoryLocation::getForSource
static MemoryLocation getForSource(const MemTransferInst *MTI)
Return a location representing the source of a memory transfer.
Definition: MemoryLocation.cpp:93
llvm::MemTransferBase::getRawSource
Value * getRawSource() const
Return the arguments to the instruction.
Definition: IntrinsicInst.h:873
llvm::MemorySSAUpdater::moveAfter
void moveAfter(MemoryUseOrDef *What, MemoryUseOrDef *Where)
Definition: MemorySSAUpdater.cpp:1196
TargetLibraryInfo.h
DenseSet.h
false
Definition: StackSlotColoring.cpp:141
llvm::MaybeAlign
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
llvm::Instruction
Definition: Instruction.h:42
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:302
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
MemCpyOptimizer.h
llvm::LocationSize::precise
static LocationSize precise(uint64_t Value)
Definition: MemoryLocation.h:101
llvm::CallBase::getParamByValType
Type * getParamByValType(unsigned ArgNo) const
Extract the byval type for a call or parameter.
Definition: InstrTypes.h:1751
llvm::getUnderlyingObject
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value,...
Definition: ValueTracking.cpp:4498
Align
uint64_t Align
Definition: ELFObjHandler.cpp:81
llvm::MemoryUseOrDef::getMemoryInst
Instruction * getMemoryInst() const
Get the instruction that this MemoryUse represents.
Definition: MemorySSA.h:259
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::MemTransferBase::getSourceAlign
MaybeAlign getSourceAlign() const
Definition: IntrinsicInst.h:898
llvm::LinearPolySize< TypeSize >::getFixed
static TypeSize getFixed(ScalarTy MinVal)
Definition: TypeSize.h:283
llvm::MemoryAccess::getBlock
BasicBlock * getBlock() const
Definition: MemorySSA.h:164
llvm::Type::getIntegerBitWidth
unsigned getIntegerBitWidth() const
Definition: DerivedTypes.h:97
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
Type.h
llvm::CallBase::getCaller
Function * getCaller()
Helper to get the caller (the parent function).
Definition: Instructions.cpp:284
llvm::MemSetInst
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
Definition: IntrinsicInst.h:1073
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
Optimization
MemCpy Optimization
Definition: MemCpyOptimizer.cpp:303
G
const DataFlowGraph & G
Definition: RDFGraph.cpp:200
llvm::DenseSet
Implements a dense probed hash-table based set.
Definition: DenseSet.h:268
llvm::MemorySSAWalker::getClobberingMemoryAccess
MemoryAccess * getClobberingMemoryAccess(const Instruction *I)
Given a memory Mod/Ref/ModRef'ing instruction, calling this will give you the nearest dominating Memo...
Definition: MemorySSA.h:1044
BasicBlock.h
llvm::cl::opt< bool >
llvm::RISCVFenceField::O
@ O
Definition: RISCVBaseInfo.h:264
mayBeVisibleThroughUnwinding
MemCpy static false bool mayBeVisibleThroughUnwinding(Value *V, Instruction *Start, Instruction *End)
Definition: MemCpyOptimizer.cpp:308
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:297
llvm::AMDGPU::Hwreg::Offset
Offset
Definition: SIDefines.h:416
llvm::TargetLibraryInfoWrapperPass
Definition: TargetLibraryInfo.h:474
uint64_t
llvm::MemorySSAUpdater
Definition: MemorySSAUpdater.h:54
llvm::GlobalValue::getParent
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:650
llvm::MemorySSAUpdater::getMemorySSA
MemorySSA * getMemorySSA() const
Get handle on MemorySSA.
Definition: MemorySSAUpdater.h:242
llvm::AssumptionAnalysis
A function analysis which provides an AssumptionCache.
Definition: AssumptionCache.h:173
llvm::PreservedAnalyses::preserve
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:173
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:53
MemoryLocation.h
llvm::MemorySSA
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:700
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::MemorySSA::getMemoryAccess
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
Definition: MemorySSA.h:717
llvm::AAResults::getModRefInfo
ModRefInfo getModRefInfo(const Instruction *I, const Optional< MemoryLocation > &OptLoc)
Check whether or not an instruction may read or write the optionally specified memory location.
Definition: AliasAnalysis.h:488
llvm::MemCpyOptPass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition: MemCpyOptimizer.cpp:1683
llvm::MemoryDef
Represents a read-write access to memory, whether it is a must-alias, or a may-alias.
Definition: MemorySSA.h:372
llvm::Instruction::setDebugLoc
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:351
llvm::ModRefInfo
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
Definition: ModRef.h:27
IRBuilder.h
llvm::SmallVectorImpl::const_iterator
typename SuperClass::const_iterator const_iterator
Definition: SmallVector.h:582
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::Instruction::mergeDIAssignID
void mergeDIAssignID(ArrayRef< const Instruction * > SourceInstructions)
Merge the DIAssignID metadata from this instruction and those attached to instructions in SourceInstr...
Definition: DebugInfo.cpp:843
llvm::MachineBasicBlock::size
unsigned size() const
Definition: MachineBasicBlock.h:275
memcpy
<%struct.s * > cast struct s *S to sbyte *< sbyte * > sbyte uint cast struct s *agg result to sbyte *< sbyte * > sbyte uint cast struct s *memtmp to sbyte *< sbyte * > sbyte uint ret void llc ends up issuing two memcpy or custom lower memcpy(of small size) to be ldmia/stmia. I think option 2 is better but the current register allocator cannot allocate a chunk of registers at a time. A feasible temporary solution is to use specific physical registers at the lowering time for small(<
llvm::DominatorTree::isReachableFromEntry
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:335
Ptr
@ Ptr
Definition: TargetLibraryInfo.cpp:60
llvm::LinearPolySize< TypeSize >::isKnownGE
static bool isKnownGE(const LinearPolySize &LHS, const LinearPolySize &RHS)
Definition: TypeSize.h:346
iterator_range.h
llvm::TargetLibraryInfo::has
bool has(LibFunc F) const
Tests whether a library function is available.
Definition: TargetLibraryInfo.h:331
function
print Print MemDeps of function
Definition: MemDepPrinter.cpp:82
llvm::MemorySSAAnalysis
An analysis that produces MemorySSA for a function.
Definition: MemorySSA.h:934
llvm::MemorySSA::getWalker
MemorySSAWalker * getWalker()
Definition: MemorySSA.cpp:1560
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:651
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
addRange
static void addRange(SmallVectorImpl< ConstantInt * > &EndPoints, ConstantInt *Low, ConstantInt *High)
Definition: Metadata.cpp:1103
llvm::MemorySSAUpdater::createMemoryAccessAfter
MemoryUseOrDef * createMemoryAccessAfter(Instruction *I, MemoryAccess *Definition, MemoryAccess *InsertPt)
Definition: MemorySSAUpdater.cpp:1447
llvm::MemIntrinsicBase::getDest
Value * getDest() const
This is just like getRawDest, but it strips off any cast instructions (including addrspacecast) that ...
Definition: IntrinsicInst.h:824
llvm::AssumptionCacheTracker
An immutable pass that tracks lazily created AssumptionCache objects.
Definition: AssumptionCache.h:202
None.h
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::MemorySSAUpdater::insertDef
void insertDef(MemoryDef *Def, bool RenameUses=false)
Insert a definition into the MemorySSA IR.
Definition: MemorySSAUpdater.cpp:307
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1716
llvm::CallBase::getParamAlign
MaybeAlign getParamAlign(unsigned ArgNo) const
Extract the alignment for a call or parameter (0=unknown).
Definition: InstrTypes.h:1742
DataLayout.h
llvm::logicalview::LVAttributeKind::Range
@ Range
llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:265
llvm::LoadInst::isSimple
bool isSimple() const
Definition: Instructions.h:252
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:42
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::isNotVisibleOnUnwind
bool isNotVisibleOnUnwind(const Value *Object, bool &RequiresNoCaptureBeforeUnwind)
Return true if Object memory is not visible after an unwind, in the sense that program semantics cann...
Definition: AliasAnalysis.cpp:929
llvm::CFGAnalyses
Represents analyses that only rely on functions' control flow.
Definition: PassManager.h:113
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
llvm::append_range
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
Definition: STLExtras.h:1988
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:82
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
accessedBetween
static bool accessedBetween(AliasAnalysis &AA, MemoryLocation Loc, const MemoryUseOrDef *Start, const MemoryUseOrDef *End, Instruction **SkippedLifetimeStart=nullptr)
Definition: MemCpyOptimizer.cpp:337
llvm::MemoryAccess
Definition: MemorySSA.h:142
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:308
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:173
llvm::CallBase::setArgOperand
void setArgOperand(unsigned i, Value *v)
Definition: InstrTypes.h:1346
llvm::Value::stripPointerCasts
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:685
llvm::ConstantInt::getZExtValue
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:142
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:85
llvm::MemorySSAUpdater::moveBefore
void moveBefore(MemoryUseOrDef *What, MemoryUseOrDef *Where)
Definition: MemorySSAUpdater.cpp:1191
llvm::partition_point
auto partition_point(R &&Range, Predicate P)
Binary search for the first iterator in a range where a predicate is false.
Definition: STLExtras.h:1935
llvm::Constant::getNullValue
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:350
llvm::None
constexpr std::nullopt_t None
Definition: None.h:27
llvm::MemoryUseOrDef::getDefiningAccess
MemoryAccess * getDefiningAccess() const
Get the access that produces the memory state used by this Use.
Definition: MemorySSA.h:262
llvm::MemIntrinsicBase::getLength
Value * getLength() const
Definition: IntrinsicInst.h:815
hasUndefContents
static bool hasUndefContents(MemorySSA *MSSA, AliasAnalysis *AA, Value *V, MemoryDef *Def, Value *Size)
Determine whether the instruction has undefined content for the given Size, either because it was fre...
Definition: MemCpyOptimizer.cpp:1329
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:348
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
llvm::CallBase::arg_size
unsigned arg_size() const
Definition: InstrTypes.h:1339
GlobalVariable.h
llvm::isGuaranteedToTransferExecutionToSuccessor
bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
Definition: ValueTracking.cpp:5470
llvm::TypeSize
Definition: TypeSize.h:435
Casting.h
Function.h
PassManager.h
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:225
llvm::MemoryUseOrDef
Class that has the common methods + fields of memory uses/defs.
Definition: MemorySSA.h:252
writtenBetween
static bool writtenBetween(MemorySSA *MSSA, AliasAnalysis &AA, MemoryLocation Loc, const MemoryUseOrDef *Start, const MemoryUseOrDef *End)
Definition: MemCpyOptimizer.cpp:361
llvm::MemCpyOptPass
Definition: MemCpyOptimizer.h:39
llvm::DominatorTreeAnalysis
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:267
MemorySSA.h
Instructions.h
llvm::MemCpyInst
This class wraps the llvm.memcpy intrinsic.
Definition: IntrinsicInst.h:1125
llvm::PreservedAnalyses::preserveSet
void preserveSet()
Mark an analysis set as preserved.
Definition: PassManager.h:188
llvm::MemIntrinsic::isVolatile
bool isVolatile() const
Definition: IntrinsicInst.h:1050
SmallVector.h
llvm::Instruction::getDebugLoc
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:354
User.h
Dominators.h
llvm::SmallVectorImpl::iterator
typename SuperClass::iterator iterator
Definition: SmallVector.h:581
llvm::CallBase::getArgOperand
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1341
llvm::AAResultsWrapperPass
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Definition: AliasAnalysis.h:924
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:91
llvm::GlobalsAAWrapperPass
Legacy wrapper pass to provide the GlobalsAAResult object.
Definition: GlobalsModRef.h:142
llvm::AAResults::isMustAlias
bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
A trivial helper function to check to see if the specified pointers are must-alias.
Definition: AliasAnalysis.h:366
llvm::MemIntrinsicBase::getRawDest
Value * getRawDest() const
Definition: IntrinsicInst.h:809
llvm::logicalview::LVWarningKind::Ranges
@ Ranges
llvm::reverse
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:485
llvm::AAResults::callCapturesBefore
ModRefInfo callCapturesBefore(const Instruction *I, const MemoryLocation &MemLoc, DominatorTree *DT)
Return information about whether a particular call site modifies or reads the specified memory locati...
Definition: AliasAnalysis.h:507
llvm::CallBase
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1174
llvm::Module::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:398
DerivedTypes.h
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:42
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1473
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:171
llvm::CastInst::CreatePointerCast
static CastInst * CreatePointerCast(Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd)
Create a BitCast AddrSpaceCast, or a PtrToInt cast instruction.
Definition: Instructions.cpp:3432
llvm::MemoryLocation::getForDest
static MemoryLocation getForDest(const MemIntrinsic *MI)
Return a location representing the destination of a memory set or transfer.
Definition: MemoryLocation.cpp:106
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::VerifyMemorySSA
bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition: MemorySSA.cpp:91
LLVMContext.h
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:394
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::cl::desc
Definition: CommandLine.h:413
raw_ostream.h
Value.h
InitializePasses.h
llvm::PointerMayBeCapturedBefore
bool PointerMayBeCapturedBefore(const Value *V, bool ReturnCaptures, bool StoreCaptures, const Instruction *I, const DominatorTree *DT, bool IncludeI=false, unsigned MaxUsesToExplore=0, const LoopInfo *LI=nullptr)
PointerMayBeCapturedBefore - Return true if this pointer value may be captured by the enclosing funct...
Definition: CaptureTracking.cpp:269
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
Debug.h
llvm::TargetLibraryAnalysis
Analysis pass providing the TargetLibraryInfo.
Definition: TargetLibraryInfo.h:449
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:421
llvm::MemoryLocation
Representation for a specific memory location.
Definition: MemoryLocation.h:210
llvm::MemorySSAUpdater::removeMemoryAccess
void removeMemoryAccess(MemoryAccess *, bool OptimizePhis=false)
Remove a MemoryAccess from MemorySSA, including updating all definitions and uses.
Definition: MemorySSAUpdater.cpp:1305
llvm::Instruction::moveBefore
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
Definition: Instruction.cpp:100
llvm::isDereferenceableAndAlignedPointer
bool isDereferenceableAndAlignedPointer(const Value *V, Type *Ty, Align Alignment, const DataLayout &DL, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Returns true if V is always a dereferenceable pointer with alignment greater or equal than requested.
Definition: Loads.cpp:201
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
llvm::MemTransferBase::getSource
Value * getSource() const
This is just like getRawSource, but it strips off any cast instructions that feed it,...
Definition: IntrinsicInst.h:884