LLVM  15.0.0git
MemCpyOptimizer.cpp
Go to the documentation of this file.
1 //===- MemCpyOptimizer.cpp - Optimize use of memcpy and friends -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass performs various transformations related to eliminating memcpy
10 // calls, or transforming sets of stores into memset's.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 #include "llvm/ADT/DenseSet.h"
16 #include "llvm/ADT/None.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/Statistic.h"
25 #include "llvm/Analysis/Loads.h"
31 #include "llvm/IR/BasicBlock.h"
32 #include "llvm/IR/Constants.h"
33 #include "llvm/IR/DataLayout.h"
34 #include "llvm/IR/DerivedTypes.h"
35 #include "llvm/IR/Dominators.h"
36 #include "llvm/IR/Function.h"
37 #include "llvm/IR/GlobalVariable.h"
38 #include "llvm/IR/IRBuilder.h"
39 #include "llvm/IR/InstrTypes.h"
40 #include "llvm/IR/Instruction.h"
41 #include "llvm/IR/Instructions.h"
42 #include "llvm/IR/IntrinsicInst.h"
43 #include "llvm/IR/Intrinsics.h"
44 #include "llvm/IR/LLVMContext.h"
45 #include "llvm/IR/Module.h"
46 #include "llvm/IR/PassManager.h"
47 #include "llvm/IR/Type.h"
48 #include "llvm/IR/User.h"
49 #include "llvm/IR/Value.h"
50 #include "llvm/InitializePasses.h"
51 #include "llvm/Pass.h"
52 #include "llvm/Support/Casting.h"
53 #include "llvm/Support/Debug.h"
56 #include "llvm/Transforms/Scalar.h"
58 #include <algorithm>
59 #include <cassert>
60 #include <cstdint>
61 
62 using namespace llvm;
63 
64 #define DEBUG_TYPE "memcpyopt"
65 
67  "enable-memcpyopt-without-libcalls", cl::init(false), cl::Hidden,
69  cl::desc("Enable memcpyopt even when libcalls are disabled"));
70 
71 STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted");
72 STATISTIC(NumMemSetInfer, "Number of memsets inferred");
73 STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy");
74 STATISTIC(NumCpyToSet, "Number of memcpys converted to memset");
75 STATISTIC(NumCallSlot, "Number of call slot optimizations performed");
76 
77 namespace {
78 
79 /// Represents a range of memset'd bytes with the ByteVal value.
80 /// This allows us to analyze stores like:
81 /// store 0 -> P+1
82 /// store 0 -> P+0
83 /// store 0 -> P+3
84 /// store 0 -> P+2
85 /// which sometimes happens with stores to arrays of structs etc. When we see
86 /// the first store, we make a range [1, 2). The second store extends the range
87 /// to [0, 2). The third makes a new range [2, 3). The fourth store joins the
88 /// two ranges into [0, 3) which is memset'able.
89 struct MemsetRange {
90  // Start/End - A semi range that describes the span that this range covers.
91  // The range is closed at the start and open at the end: [Start, End).
92  int64_t Start, End;
93 
94  /// StartPtr - The getelementptr instruction that points to the start of the
95  /// range.
96  Value *StartPtr;
97 
98  /// Alignment - The known alignment of the first store.
99  unsigned Alignment;
100 
101  /// TheStores - The actual stores that make up this range.
103 
104  bool isProfitableToUseMemset(const DataLayout &DL) const;
105 };
106 
107 } // end anonymous namespace
108 
109 bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const {
110  // If we found more than 4 stores to merge or 16 bytes, use memset.
111  if (TheStores.size() >= 4 || End-Start >= 16) return true;
112 
113  // If there is nothing to merge, don't do anything.
114  if (TheStores.size() < 2) return false;
115 
116  // If any of the stores are a memset, then it is always good to extend the
117  // memset.
118  for (Instruction *SI : TheStores)
119  if (!isa<StoreInst>(SI))
120  return true;
121 
122  // Assume that the code generator is capable of merging pairs of stores
123  // together if it wants to.
124  if (TheStores.size() == 2) return false;
125 
126  // If we have fewer than 8 stores, it can still be worthwhile to do this.
127  // For example, merging 4 i8 stores into an i32 store is useful almost always.
128  // However, merging 2 32-bit stores isn't useful on a 32-bit architecture (the
129  // memset will be split into 2 32-bit stores anyway) and doing so can
130  // pessimize the llvm optimizer.
131  //
132  // Since we don't have perfect knowledge here, make some assumptions: assume
133  // the maximum GPR width is the same size as the largest legal integer
134  // size. If so, check to see whether we will end up actually reducing the
135  // number of stores used.
136  unsigned Bytes = unsigned(End-Start);
137  unsigned MaxIntSize = DL.getLargestLegalIntTypeSizeInBits() / 8;
138  if (MaxIntSize == 0)
139  MaxIntSize = 1;
140  unsigned NumPointerStores = Bytes / MaxIntSize;
141 
142  // Assume the remaining bytes if any are done a byte at a time.
143  unsigned NumByteStores = Bytes % MaxIntSize;
144 
145  // If we will reduce the # stores (according to this heuristic), do the
146  // transformation. This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32
147  // etc.
148  return TheStores.size() > NumPointerStores+NumByteStores;
149 }
150 
151 namespace {
152 
153 class MemsetRanges {
154  using range_iterator = SmallVectorImpl<MemsetRange>::iterator;
155 
156  /// A sorted list of the memset ranges.
158 
159  const DataLayout &DL;
160 
161 public:
162  MemsetRanges(const DataLayout &DL) : DL(DL) {}
163 
164  using const_iterator = SmallVectorImpl<MemsetRange>::const_iterator;
165 
166  const_iterator begin() const { return Ranges.begin(); }
167  const_iterator end() const { return Ranges.end(); }
168  bool empty() const { return Ranges.empty(); }
169 
170  void addInst(int64_t OffsetFromFirst, Instruction *Inst) {
171  if (auto *SI = dyn_cast<StoreInst>(Inst))
172  addStore(OffsetFromFirst, SI);
173  else
174  addMemSet(OffsetFromFirst, cast<MemSetInst>(Inst));
175  }
176 
177  void addStore(int64_t OffsetFromFirst, StoreInst *SI) {
178  TypeSize StoreSize = DL.getTypeStoreSize(SI->getOperand(0)->getType());
179  assert(!StoreSize.isScalable() && "Can't track scalable-typed stores");
180  addRange(OffsetFromFirst, StoreSize.getFixedSize(), SI->getPointerOperand(),
181  SI->getAlign().value(), SI);
182  }
183 
184  void addMemSet(int64_t OffsetFromFirst, MemSetInst *MSI) {
185  int64_t Size = cast<ConstantInt>(MSI->getLength())->getZExtValue();
186  addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getDestAlignment(), MSI);
187  }
188 
189  void addRange(int64_t Start, int64_t Size, Value *Ptr,
190  unsigned Alignment, Instruction *Inst);
191 };
192 
193 } // end anonymous namespace
194 
195 /// Add a new store to the MemsetRanges data structure. This adds a
196 /// new range for the specified store at the specified offset, merging into
197 /// existing ranges as appropriate.
198 void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,
199  unsigned Alignment, Instruction *Inst) {
200  int64_t End = Start+Size;
201 
202  range_iterator I = partition_point(
203  Ranges, [=](const MemsetRange &O) { return O.End < Start; });
204 
205  // We now know that I == E, in which case we didn't find anything to merge
206  // with, or that Start <= I->End. If End < I->Start or I == E, then we need
207  // to insert a new range. Handle this now.
208  if (I == Ranges.end() || End < I->Start) {
209  MemsetRange &R = *Ranges.insert(I, MemsetRange());
210  R.Start = Start;
211  R.End = End;
212  R.StartPtr = Ptr;
213  R.Alignment = Alignment;
214  R.TheStores.push_back(Inst);
215  return;
216  }
217 
218  // This store overlaps with I, add it.
219  I->TheStores.push_back(Inst);
220 
221  // At this point, we may have an interval that completely contains our store.
222  // If so, just add it to the interval and return.
223  if (I->Start <= Start && I->End >= End)
224  return;
225 
226  // Now we know that Start <= I->End and End >= I->Start so the range overlaps
227  // but is not entirely contained within the range.
228 
229  // See if the range extends the start of the range. In this case, it couldn't
230  // possibly cause it to join the prior range, because otherwise we would have
231  // stopped on *it*.
232  if (Start < I->Start) {
233  I->Start = Start;
234  I->StartPtr = Ptr;
235  I->Alignment = Alignment;
236  }
237 
238  // Now we know that Start <= I->End and Start >= I->Start (so the startpoint
239  // is in or right at the end of I), and that End >= I->Start. Extend I out to
240  // End.
241  if (End > I->End) {
242  I->End = End;
243  range_iterator NextI = I;
244  while (++NextI != Ranges.end() && End >= NextI->Start) {
245  // Merge the range in.
246  I->TheStores.append(NextI->TheStores.begin(), NextI->TheStores.end());
247  if (NextI->End > I->End)
248  I->End = NextI->End;
249  Ranges.erase(NextI);
250  NextI = I;
251  }
252  }
253 }
254 
255 //===----------------------------------------------------------------------===//
256 // MemCpyOptLegacyPass Pass
257 //===----------------------------------------------------------------------===//
258 
259 namespace {
260 
261 class MemCpyOptLegacyPass : public FunctionPass {
262  MemCpyOptPass Impl;
263 
264 public:
265  static char ID; // Pass identification, replacement for typeid
266 
267  MemCpyOptLegacyPass() : FunctionPass(ID) {
269  }
270 
271  bool runOnFunction(Function &F) override;
272 
273 private:
274  // This transformation requires dominator postdominator info
275  void getAnalysisUsage(AnalysisUsage &AU) const override {
276  AU.setPreservesCFG();
286  }
287 };
288 
289 } // end anonymous namespace
290 
291 char MemCpyOptLegacyPass::ID = 0;
292 
293 /// The public interface to this file...
294 FunctionPass *llvm::createMemCpyOptPass() { return new MemCpyOptLegacyPass(); }
295 
296 INITIALIZE_PASS_BEGIN(MemCpyOptLegacyPass, "memcpyopt", "MemCpy Optimization",
297  false, false)
304 INITIALIZE_PASS_END(MemCpyOptLegacyPass, "memcpyopt", "MemCpy Optimization",
306 
307 // Check that V is either not accessible by the caller, or unwinding cannot
308 // occur between Start and End.
310  Instruction *End) {
311  assert(Start->getParent() == End->getParent() && "Must be in same block");
312  // Function can't unwind, so it also can't be visible through unwinding.
313  if (Start->getFunction()->doesNotThrow())
314  return false;
315 
316  // Object is not visible on unwind.
317  // TODO: Support RequiresNoCaptureBeforeUnwind case.
318  bool RequiresNoCaptureBeforeUnwind;
320  RequiresNoCaptureBeforeUnwind) &&
321  !RequiresNoCaptureBeforeUnwind)
322  return false;
323 
324  // Check whether there are any unwinding instructions in the range.
325  return any_of(make_range(Start->getIterator(), End->getIterator()),
326  [](const Instruction &I) { return I.mayThrow(); });
327 }
328 
329 void MemCpyOptPass::eraseInstruction(Instruction *I) {
330  MSSAU->removeMemoryAccess(I);
331  I->eraseFromParent();
332 }
333 
334 // Check for mod or ref of Loc between Start and End, excluding both boundaries.
335 // Start and End must be in the same block
337  const MemoryUseOrDef *Start,
338  const MemoryUseOrDef *End) {
339  assert(Start->getBlock() == End->getBlock() && "Only local supported");
340  for (const MemoryAccess &MA :
341  make_range(++Start->getIterator(), End->getIterator())) {
342  if (isModOrRefSet(AA.getModRefInfo(cast<MemoryUseOrDef>(MA).getMemoryInst(),
343  Loc)))
344  return true;
345  }
346  return false;
347 }
348 
349 // Check for mod of Loc between Start and End, excluding both boundaries.
350 // Start and End can be in different blocks.
352  MemoryLocation Loc, const MemoryUseOrDef *Start,
353  const MemoryUseOrDef *End) {
354  if (isa<MemoryUse>(End)) {
355  // For MemoryUses, getClobberingMemoryAccess may skip non-clobbering writes.
356  // Manually check read accesses between Start and End, if they are in the
357  // same block, for clobbers. Otherwise assume Loc is clobbered.
358  return Start->getBlock() != End->getBlock() ||
359  any_of(
360  make_range(std::next(Start->getIterator()), End->getIterator()),
361  [&AA, Loc](const MemoryAccess &Acc) {
362  if (isa<MemoryUse>(&Acc))
363  return false;
364  Instruction *AccInst =
365  cast<MemoryUseOrDef>(&Acc)->getMemoryInst();
366  return isModSet(AA.getModRefInfo(AccInst, Loc));
367  });
368  }
369 
370  // TODO: Only walk until we hit Start.
372  End->getDefiningAccess(), Loc);
373  return !MSSA->dominates(Clobber, Start);
374 }
375 
376 /// When scanning forward over instructions, we look for some other patterns to
377 /// fold away. In particular, this looks for stores to neighboring locations of
378 /// memory. If it sees enough consecutive ones, it attempts to merge them
379 /// together into a memcpy/memset.
380 Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
381  Value *StartPtr,
382  Value *ByteVal) {
383  const DataLayout &DL = StartInst->getModule()->getDataLayout();
384 
385  // We can't track scalable types
386  if (auto *SI = dyn_cast<StoreInst>(StartInst))
387  if (DL.getTypeStoreSize(SI->getOperand(0)->getType()).isScalable())
388  return nullptr;
389 
390  // Okay, so we now have a single store that can be splatable. Scan to find
391  // all subsequent stores of the same value to offset from the same pointer.
392  // Join these together into ranges, so we can decide whether contiguous blocks
393  // are stored.
394  MemsetRanges Ranges(DL);
395 
396  BasicBlock::iterator BI(StartInst);
397 
398  // Keeps track of the last memory use or def before the insertion point for
399  // the new memset. The new MemoryDef for the inserted memsets will be inserted
400  // after MemInsertPoint. It points to either LastMemDef or to the last user
401  // before the insertion point of the memset, if there are any such users.
402  MemoryUseOrDef *MemInsertPoint = nullptr;
403  // Keeps track of the last MemoryDef between StartInst and the insertion point
404  // for the new memset. This will become the defining access of the inserted
405  // memsets.
406  MemoryDef *LastMemDef = nullptr;
407  for (++BI; !BI->isTerminator(); ++BI) {
408  auto *CurrentAcc = cast_or_null<MemoryUseOrDef>(
409  MSSAU->getMemorySSA()->getMemoryAccess(&*BI));
410  if (CurrentAcc) {
411  MemInsertPoint = CurrentAcc;
412  if (auto *CurrentDef = dyn_cast<MemoryDef>(CurrentAcc))
413  LastMemDef = CurrentDef;
414  }
415 
416  // Calls that only access inaccessible memory do not block merging
417  // accessible stores.
418  if (auto *CB = dyn_cast<CallBase>(BI)) {
419  if (CB->onlyAccessesInaccessibleMemory())
420  continue;
421  }
422 
423  if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) {
424  // If the instruction is readnone, ignore it, otherwise bail out. We
425  // don't even allow readonly here because we don't want something like:
426  // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A).
427  if (BI->mayWriteToMemory() || BI->mayReadFromMemory())
428  break;
429  continue;
430  }
431 
432  if (auto *NextStore = dyn_cast<StoreInst>(BI)) {
433  // If this is a store, see if we can merge it in.
434  if (!NextStore->isSimple()) break;
435 
436  Value *StoredVal = NextStore->getValueOperand();
437 
438  // Don't convert stores of non-integral pointer types to memsets (which
439  // stores integers).
440  if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()))
441  break;
442 
443  // We can't track ranges involving scalable types.
444  if (DL.getTypeStoreSize(StoredVal->getType()).isScalable())
445  break;
446 
447  // Check to see if this stored value is of the same byte-splattable value.
448  Value *StoredByte = isBytewiseValue(StoredVal, DL);
449  if (isa<UndefValue>(ByteVal) && StoredByte)
450  ByteVal = StoredByte;
451  if (ByteVal != StoredByte)
452  break;
453 
454  // Check to see if this store is to a constant offset from the start ptr.
456  isPointerOffset(StartPtr, NextStore->getPointerOperand(), DL);
457  if (!Offset)
458  break;
459 
460  Ranges.addStore(*Offset, NextStore);
461  } else {
462  auto *MSI = cast<MemSetInst>(BI);
463 
464  if (MSI->isVolatile() || ByteVal != MSI->getValue() ||
465  !isa<ConstantInt>(MSI->getLength()))
466  break;
467 
468  // Check to see if this store is to a constant offset from the start ptr.
469  Optional<int64_t> Offset = isPointerOffset(StartPtr, MSI->getDest(), DL);
470  if (!Offset)
471  break;
472 
473  Ranges.addMemSet(*Offset, MSI);
474  }
475  }
476 
477  // If we have no ranges, then we just had a single store with nothing that
478  // could be merged in. This is a very common case of course.
479  if (Ranges.empty())
480  return nullptr;
481 
482  // If we had at least one store that could be merged in, add the starting
483  // store as well. We try to avoid this unless there is at least something
484  // interesting as a small compile-time optimization.
485  Ranges.addInst(0, StartInst);
486 
487  // If we create any memsets, we put it right before the first instruction that
488  // isn't part of the memset block. This ensure that the memset is dominated
489  // by any addressing instruction needed by the start of the block.
490  IRBuilder<> Builder(&*BI);
491 
492  // Now that we have full information about ranges, loop over the ranges and
493  // emit memset's for anything big enough to be worthwhile.
494  Instruction *AMemSet = nullptr;
495  for (const MemsetRange &Range : Ranges) {
496  if (Range.TheStores.size() == 1) continue;
497 
498  // If it is profitable to lower this range to memset, do so now.
499  if (!Range.isProfitableToUseMemset(DL))
500  continue;
501 
502  // Otherwise, we do want to transform this! Create a new memset.
503  // Get the starting pointer of the block.
504  StartPtr = Range.StartPtr;
505 
506  AMemSet = Builder.CreateMemSet(StartPtr, ByteVal, Range.End - Range.Start,
507  MaybeAlign(Range.Alignment));
508  LLVM_DEBUG(dbgs() << "Replace stores:\n"; for (Instruction *SI
509  : Range.TheStores) dbgs()
510  << *SI << '\n';
511  dbgs() << "With: " << *AMemSet << '\n');
512  if (!Range.TheStores.empty())
513  AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc());
514 
515  assert(LastMemDef && MemInsertPoint &&
516  "Both LastMemDef and MemInsertPoint need to be set");
517  auto *NewDef =
518  cast<MemoryDef>(MemInsertPoint->getMemoryInst() == &*BI
519  ? MSSAU->createMemoryAccessBefore(
520  AMemSet, LastMemDef, MemInsertPoint)
521  : MSSAU->createMemoryAccessAfter(
522  AMemSet, LastMemDef, MemInsertPoint));
523  MSSAU->insertDef(NewDef, /*RenameUses=*/true);
524  LastMemDef = NewDef;
525  MemInsertPoint = NewDef;
526 
527  // Zap all the stores.
528  for (Instruction *SI : Range.TheStores)
529  eraseInstruction(SI);
530 
531  ++NumMemSetInfer;
532  }
533 
534  return AMemSet;
535 }
536 
537 // This method try to lift a store instruction before position P.
538 // It will lift the store and its argument + that anything that
539 // may alias with these.
540 // The method returns true if it was successful.
541 bool MemCpyOptPass::moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI) {
542  // If the store alias this position, early bail out.
544  if (isModOrRefSet(AA->getModRefInfo(P, StoreLoc)))
545  return false;
546 
547  // Keep track of the arguments of all instruction we plan to lift
548  // so we can make sure to lift them as well if appropriate.
550  if (auto *Ptr = dyn_cast<Instruction>(SI->getPointerOperand()))
551  if (Ptr->getParent() == SI->getParent())
552  Args.insert(Ptr);
553 
554  // Instruction to lift before P.
556 
557  // Memory locations of lifted instructions.
558  SmallVector<MemoryLocation, 8> MemLocs{StoreLoc};
559 
560  // Lifted calls.
562 
563  const MemoryLocation LoadLoc = MemoryLocation::get(LI);
564 
565  for (auto I = --SI->getIterator(), E = P->getIterator(); I != E; --I) {
566  auto *C = &*I;
567 
568  // Make sure hoisting does not perform a store that was not guaranteed to
569  // happen.
571  return false;
572 
573  bool MayAlias = isModOrRefSet(AA->getModRefInfo(C, None));
574 
575  bool NeedLift = false;
576  if (Args.erase(C))
577  NeedLift = true;
578  else if (MayAlias) {
579  NeedLift = llvm::any_of(MemLocs, [C, this](const MemoryLocation &ML) {
580  return isModOrRefSet(AA->getModRefInfo(C, ML));
581  });
582 
583  if (!NeedLift)
584  NeedLift = llvm::any_of(Calls, [C, this](const CallBase *Call) {
585  return isModOrRefSet(AA->getModRefInfo(C, Call));
586  });
587  }
588 
589  if (!NeedLift)
590  continue;
591 
592  if (MayAlias) {
593  // Since LI is implicitly moved downwards past the lifted instructions,
594  // none of them may modify its source.
595  if (isModSet(AA->getModRefInfo(C, LoadLoc)))
596  return false;
597  else if (const auto *Call = dyn_cast<CallBase>(C)) {
598  // If we can't lift this before P, it's game over.
599  if (isModOrRefSet(AA->getModRefInfo(P, Call)))
600  return false;
601 
602  Calls.push_back(Call);
603  } else if (isa<LoadInst>(C) || isa<StoreInst>(C) || isa<VAArgInst>(C)) {
604  // If we can't lift this before P, it's game over.
605  auto ML = MemoryLocation::get(C);
606  if (isModOrRefSet(AA->getModRefInfo(P, ML)))
607  return false;
608 
609  MemLocs.push_back(ML);
610  } else
611  // We don't know how to lift this instruction.
612  return false;
613  }
614 
615  ToLift.push_back(C);
616  for (unsigned k = 0, e = C->getNumOperands(); k != e; ++k)
617  if (auto *A = dyn_cast<Instruction>(C->getOperand(k))) {
618  if (A->getParent() == SI->getParent()) {
619  // Cannot hoist user of P above P
620  if(A == P) return false;
621  Args.insert(A);
622  }
623  }
624  }
625 
626  // Find MSSA insertion point. Normally P will always have a corresponding
627  // memory access before which we can insert. However, with non-standard AA
628  // pipelines, there may be a mismatch between AA and MSSA, in which case we
629  // will scan for a memory access before P. In either case, we know for sure
630  // that at least the load will have a memory access.
631  // TODO: Simplify this once P will be determined by MSSA, in which case the
632  // discrepancy can no longer occur.
633  MemoryUseOrDef *MemInsertPoint = nullptr;
634  if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(P)) {
635  MemInsertPoint = cast<MemoryUseOrDef>(--MA->getIterator());
636  } else {
637  const Instruction *ConstP = P;
638  for (const Instruction &I : make_range(++ConstP->getReverseIterator(),
639  ++LI->getReverseIterator())) {
640  if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(&I)) {
641  MemInsertPoint = MA;
642  break;
643  }
644  }
645  }
646 
647  // We made it, we need to lift.
648  for (auto *I : llvm::reverse(ToLift)) {
649  LLVM_DEBUG(dbgs() << "Lifting " << *I << " before " << *P << "\n");
650  I->moveBefore(P);
651  assert(MemInsertPoint && "Must have found insert point");
652  if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(I)) {
653  MSSAU->moveAfter(MA, MemInsertPoint);
654  MemInsertPoint = MA;
655  }
656  }
657 
658  return true;
659 }
660 
661 bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
662  if (!SI->isSimple()) return false;
663 
664  // Avoid merging nontemporal stores since the resulting
665  // memcpy/memset would not be able to preserve the nontemporal hint.
666  // In theory we could teach how to propagate the !nontemporal metadata to
667  // memset calls. However, that change would force the backend to
668  // conservatively expand !nontemporal memset calls back to sequences of
669  // store instructions (effectively undoing the merging).
670  if (SI->getMetadata(LLVMContext::MD_nontemporal))
671  return false;
672 
673  const DataLayout &DL = SI->getModule()->getDataLayout();
674 
675  Value *StoredVal = SI->getValueOperand();
676 
677  // Not all the transforms below are correct for non-integral pointers, bail
678  // until we've audited the individual pieces.
679  if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()))
680  return false;
681 
682  // Load to store forwarding can be interpreted as memcpy.
683  if (auto *LI = dyn_cast<LoadInst>(StoredVal)) {
684  if (LI->isSimple() && LI->hasOneUse() &&
685  LI->getParent() == SI->getParent()) {
686 
687  auto *T = LI->getType();
688  // Don't introduce calls to memcpy/memmove intrinsics out of thin air if
689  // the corresponding libcalls are not available.
690  // TODO: We should really distinguish between libcall availability and
691  // our ability to introduce intrinsics.
692  if (T->isAggregateType() &&
694  (TLI->has(LibFunc_memcpy) && TLI->has(LibFunc_memmove)))) {
695  MemoryLocation LoadLoc = MemoryLocation::get(LI);
696 
697  // We use alias analysis to check if an instruction may store to
698  // the memory we load from in between the load and the store. If
699  // such an instruction is found, we try to promote there instead
700  // of at the store position.
701  // TODO: Can use MSSA for this.
702  Instruction *P = SI;
703  for (auto &I : make_range(++LI->getIterator(), SI->getIterator())) {
704  if (isModSet(AA->getModRefInfo(&I, LoadLoc))) {
705  P = &I;
706  break;
707  }
708  }
709 
710  // We found an instruction that may write to the loaded memory.
711  // We can try to promote at this position instead of the store
712  // position if nothing aliases the store memory after this and the store
713  // destination is not in the range.
714  if (P && P != SI) {
715  if (!moveUp(SI, P, LI))
716  P = nullptr;
717  }
718 
719  // If a valid insertion position is found, then we can promote
720  // the load/store pair to a memcpy.
721  if (P) {
722  // If we load from memory that may alias the memory we store to,
723  // memmove must be used to preserve semantic. If not, memcpy can
724  // be used. Also, if we load from constant memory, memcpy can be used
725  // as the constant memory won't be modified.
726  bool UseMemMove = false;
727  if (isModSet(AA->getModRefInfo(SI, LoadLoc)))
728  UseMemMove = true;
729 
730  uint64_t Size = DL.getTypeStoreSize(T);
731 
733  Instruction *M;
734  if (UseMemMove)
735  M = Builder.CreateMemMove(
736  SI->getPointerOperand(), SI->getAlign(),
737  LI->getPointerOperand(), LI->getAlign(), Size);
738  else
739  M = Builder.CreateMemCpy(
740  SI->getPointerOperand(), SI->getAlign(),
741  LI->getPointerOperand(), LI->getAlign(), Size);
742 
743  LLVM_DEBUG(dbgs() << "Promoting " << *LI << " to " << *SI << " => "
744  << *M << "\n");
745 
746  auto *LastDef =
747  cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(SI));
748  auto *NewAccess = MSSAU->createMemoryAccessAfter(M, LastDef, LastDef);
749  MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
750 
751  eraseInstruction(SI);
752  eraseInstruction(LI);
753  ++NumMemCpyInstr;
754 
755  // Make sure we do not invalidate the iterator.
756  BBI = M->getIterator();
757  return true;
758  }
759  }
760 
761  // Detect cases where we're performing call slot forwarding, but
762  // happen to be using a load-store pair to implement it, rather than
763  // a memcpy.
764  auto GetCall = [&]() -> CallInst * {
765  // We defer this expensive clobber walk until the cheap checks
766  // have been done on the source inside performCallSlotOptzn.
767  if (auto *LoadClobber = dyn_cast<MemoryUseOrDef>(
768  MSSA->getWalker()->getClobberingMemoryAccess(LI)))
769  return dyn_cast_or_null<CallInst>(LoadClobber->getMemoryInst());
770  return nullptr;
771  };
772 
773  bool changed = performCallSlotOptzn(
774  LI, SI, SI->getPointerOperand()->stripPointerCasts(),
776  DL.getTypeStoreSize(SI->getOperand(0)->getType()),
777  commonAlignment(SI->getAlign(), LI->getAlign()), GetCall);
778  if (changed) {
779  eraseInstruction(SI);
780  eraseInstruction(LI);
781  ++NumMemCpyInstr;
782  return true;
783  }
784  }
785  }
786 
787  // The following code creates memset intrinsics out of thin air. Don't do
788  // this if the corresponding libfunc is not available.
789  // TODO: We should really distinguish between libcall availability and
790  // our ability to introduce intrinsics.
791  if (!(TLI->has(LibFunc_memset) || EnableMemCpyOptWithoutLibcalls))
792  return false;
793 
794  // There are two cases that are interesting for this code to handle: memcpy
795  // and memset. Right now we only handle memset.
796 
797  // Ensure that the value being stored is something that can be memset'able a
798  // byte at a time like "0" or "-1" or any width, as well as things like
799  // 0xA0A0A0A0 and 0.0.
800  auto *V = SI->getOperand(0);
801  if (Value *ByteVal = isBytewiseValue(V, DL)) {
802  if (Instruction *I = tryMergingIntoMemset(SI, SI->getPointerOperand(),
803  ByteVal)) {
804  BBI = I->getIterator(); // Don't invalidate iterator.
805  return true;
806  }
807 
808  // If we have an aggregate, we try to promote it to memset regardless
809  // of opportunity for merging as it can expose optimization opportunities
810  // in subsequent passes.
811  auto *T = V->getType();
812  if (T->isAggregateType()) {
813  uint64_t Size = DL.getTypeStoreSize(T);
815  auto *M = Builder.CreateMemSet(SI->getPointerOperand(), ByteVal, Size,
816  SI->getAlign());
817 
818  LLVM_DEBUG(dbgs() << "Promoting " << *SI << " to " << *M << "\n");
819 
820  // The newly inserted memset is immediately overwritten by the original
821  // store, so we do not need to rename uses.
822  auto *StoreDef = cast<MemoryDef>(MSSA->getMemoryAccess(SI));
823  auto *NewAccess = MSSAU->createMemoryAccessBefore(
824  M, StoreDef->getDefiningAccess(), StoreDef);
825  MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/false);
826 
827  eraseInstruction(SI);
828  NumMemSetInfer++;
829 
830  // Make sure we do not invalidate the iterator.
831  BBI = M->getIterator();
832  return true;
833  }
834  }
835 
836  return false;
837 }
838 
839 bool MemCpyOptPass::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) {
840  // See if there is another memset or store neighboring this memset which
841  // allows us to widen out the memset to do a single larger store.
842  if (isa<ConstantInt>(MSI->getLength()) && !MSI->isVolatile())
843  if (Instruction *I = tryMergingIntoMemset(MSI, MSI->getDest(),
844  MSI->getValue())) {
845  BBI = I->getIterator(); // Don't invalidate iterator.
846  return true;
847  }
848  return false;
849 }
850 
851 /// Takes a memcpy and a call that it depends on,
852 /// and checks for the possibility of a call slot optimization by having
853 /// the call write its result directly into the destination of the memcpy.
854 bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
855  Instruction *cpyStore, Value *cpyDest,
856  Value *cpySrc, TypeSize cpySize,
857  Align cpyAlign,
858  std::function<CallInst *()> GetC) {
859  // The general transformation to keep in mind is
860  //
861  // call @func(..., src, ...)
862  // memcpy(dest, src, ...)
863  //
864  // ->
865  //
866  // memcpy(dest, src, ...)
867  // call @func(..., dest, ...)
868  //
869  // Since moving the memcpy is technically awkward, we additionally check that
870  // src only holds uninitialized values at the moment of the call, meaning that
871  // the memcpy can be discarded rather than moved.
872 
873  // We can't optimize scalable types.
874  if (cpySize.isScalable())
875  return false;
876 
877  // Require that src be an alloca. This simplifies the reasoning considerably.
878  auto *srcAlloca = dyn_cast<AllocaInst>(cpySrc);
879  if (!srcAlloca)
880  return false;
881 
882  ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize());
883  if (!srcArraySize)
884  return false;
885 
886  const DataLayout &DL = cpyLoad->getModule()->getDataLayout();
887  uint64_t srcSize = DL.getTypeAllocSize(srcAlloca->getAllocatedType()) *
888  srcArraySize->getZExtValue();
889 
890  if (cpySize < srcSize)
891  return false;
892 
893  CallInst *C = GetC();
894  if (!C)
895  return false;
896 
897  // Lifetime marks shouldn't be operated on.
898  if (Function *F = C->getCalledFunction())
899  if (F->isIntrinsic() && F->getIntrinsicID() == Intrinsic::lifetime_start)
900  return false;
901 
902 
903  if (C->getParent() != cpyStore->getParent()) {
904  LLVM_DEBUG(dbgs() << "Call Slot: block local restriction\n");
905  return false;
906  }
907 
908  MemoryLocation DestLoc = isa<StoreInst>(cpyStore) ?
909  MemoryLocation::get(cpyStore) :
910  MemoryLocation::getForDest(cast<MemCpyInst>(cpyStore));
911 
912  // Check that nothing touches the dest of the copy between
913  // the call and the store/memcpy.
914  if (accessedBetween(*AA, DestLoc, MSSA->getMemoryAccess(C),
915  MSSA->getMemoryAccess(cpyStore))) {
916  LLVM_DEBUG(dbgs() << "Call Slot: Dest pointer modified after call\n");
917  return false;
918  }
919 
920  // Check that accessing the first srcSize bytes of dest will not cause a
921  // trap. Otherwise the transform is invalid since it might cause a trap
922  // to occur earlier than it otherwise would.
923  if (!isDereferenceableAndAlignedPointer(cpyDest, Align(1), APInt(64, cpySize),
924  DL, C, DT)) {
925  LLVM_DEBUG(dbgs() << "Call Slot: Dest pointer not dereferenceable\n");
926  return false;
927  }
928 
929 
930  // Make sure that nothing can observe cpyDest being written early. There are
931  // a number of cases to consider:
932  // 1. cpyDest cannot be accessed between C and cpyStore as a precondition of
933  // the transform.
934  // 2. C itself may not access cpyDest (prior to the transform). This is
935  // checked further below.
936  // 3. If cpyDest is accessible to the caller of this function (potentially
937  // captured and not based on an alloca), we need to ensure that we cannot
938  // unwind between C and cpyStore. This is checked here.
939  // 4. If cpyDest is potentially captured, there may be accesses to it from
940  // another thread. In this case, we need to check that cpyStore is
941  // guaranteed to be executed if C is. As it is a non-atomic access, it
942  // renders accesses from other threads undefined.
943  // TODO: This is currently not checked.
944  if (mayBeVisibleThroughUnwinding(cpyDest, C, cpyStore)) {
945  LLVM_DEBUG(dbgs() << "Call Slot: Dest may be visible through unwinding");
946  return false;
947  }
948 
949  // Check that dest points to memory that is at least as aligned as src.
950  Align srcAlign = srcAlloca->getAlign();
951  bool isDestSufficientlyAligned = srcAlign <= cpyAlign;
952  // If dest is not aligned enough and we can't increase its alignment then
953  // bail out.
954  if (!isDestSufficientlyAligned && !isa<AllocaInst>(cpyDest))
955  return false;
956 
957  // Check that src is not accessed except via the call and the memcpy. This
958  // guarantees that it holds only undefined values when passed in (so the final
959  // memcpy can be dropped), that it is not read or written between the call and
960  // the memcpy, and that writing beyond the end of it is undefined.
961  SmallVector<User *, 8> srcUseList(srcAlloca->users());
962  while (!srcUseList.empty()) {
963  User *U = srcUseList.pop_back_val();
964 
965  if (isa<BitCastInst>(U) || isa<AddrSpaceCastInst>(U)) {
966  append_range(srcUseList, U->users());
967  continue;
968  }
969  if (const auto *G = dyn_cast<GetElementPtrInst>(U)) {
970  if (!G->hasAllZeroIndices())
971  return false;
972 
973  append_range(srcUseList, U->users());
974  continue;
975  }
976  if (const auto *IT = dyn_cast<IntrinsicInst>(U))
977  if (IT->isLifetimeStartOrEnd())
978  continue;
979 
980  if (U != C && U != cpyLoad)
981  return false;
982  }
983 
984  // Check whether src is captured by the called function, in which case there
985  // may be further indirect uses of src.
986  bool SrcIsCaptured = any_of(C->args(), [&](Use &U) {
987  return U->stripPointerCasts() == cpySrc &&
988  !C->doesNotCapture(C->getArgOperandNo(&U));
989  });
990 
991  // If src is captured, then check whether there are any potential uses of
992  // src through the captured pointer before the lifetime of src ends, either
993  // due to a lifetime.end or a return from the function.
994  if (SrcIsCaptured) {
995  // Check that dest is not captured before/at the call. We have already
996  // checked that src is not captured before it. If either had been captured,
997  // then the call might be comparing the argument against the captured dest
998  // or src pointer.
999  Value *DestObj = getUnderlyingObject(cpyDest);
1000  if (!isIdentifiedFunctionLocal(DestObj) ||
1001  PointerMayBeCapturedBefore(DestObj, /* ReturnCaptures */ true,
1002  /* StoreCaptures */ true, C, DT,
1003  /* IncludeI */ true))
1004  return false;
1005 
1006  MemoryLocation SrcLoc =
1007  MemoryLocation(srcAlloca, LocationSize::precise(srcSize));
1008  for (Instruction &I :
1009  make_range(++C->getIterator(), C->getParent()->end())) {
1010  // Lifetime of srcAlloca ends at lifetime.end.
1011  if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
1012  if (II->getIntrinsicID() == Intrinsic::lifetime_end &&
1013  II->getArgOperand(1)->stripPointerCasts() == srcAlloca &&
1014  cast<ConstantInt>(II->getArgOperand(0))->uge(srcSize))
1015  break;
1016  }
1017 
1018  // Lifetime of srcAlloca ends at return.
1019  if (isa<ReturnInst>(&I))
1020  break;
1021 
1022  // Ignore the direct read of src in the load.
1023  if (&I == cpyLoad)
1024  continue;
1025 
1026  // Check whether this instruction may mod/ref src through the captured
1027  // pointer (we have already any direct mod/refs in the loop above).
1028  // Also bail if we hit a terminator, as we don't want to scan into other
1029  // blocks.
1030  if (isModOrRefSet(AA->getModRefInfo(&I, SrcLoc)) || I.isTerminator())
1031  return false;
1032  }
1033  }
1034 
1035  // Since we're changing the parameter to the callsite, we need to make sure
1036  // that what would be the new parameter dominates the callsite.
1037  if (!DT->dominates(cpyDest, C)) {
1038  // Support moving a constant index GEP before the call.
1039  auto *GEP = dyn_cast<GetElementPtrInst>(cpyDest);
1040  if (GEP && GEP->hasAllConstantIndices() &&
1041  DT->dominates(GEP->getPointerOperand(), C))
1042  GEP->moveBefore(C);
1043  else
1044  return false;
1045  }
1046 
1047  // In addition to knowing that the call does not access src in some
1048  // unexpected manner, for example via a global, which we deduce from
1049  // the use analysis, we also need to know that it does not sneakily
1050  // access dest. We rely on AA to figure this out for us.
1051  ModRefInfo MR = AA->getModRefInfo(C, cpyDest, LocationSize::precise(srcSize));
1052  // If necessary, perform additional analysis.
1053  if (isModOrRefSet(MR))
1054  MR = AA->callCapturesBefore(C, cpyDest, LocationSize::precise(srcSize), DT);
1055  if (isModOrRefSet(MR))
1056  return false;
1057 
1058  // We can't create address space casts here because we don't know if they're
1059  // safe for the target.
1060  if (cpySrc->getType()->getPointerAddressSpace() !=
1061  cpyDest->getType()->getPointerAddressSpace())
1062  return false;
1063  for (unsigned ArgI = 0; ArgI < C->arg_size(); ++ArgI)
1064  if (C->getArgOperand(ArgI)->stripPointerCasts() == cpySrc &&
1065  cpySrc->getType()->getPointerAddressSpace() !=
1066  C->getArgOperand(ArgI)->getType()->getPointerAddressSpace())
1067  return false;
1068 
1069  // All the checks have passed, so do the transformation.
1070  bool changedArgument = false;
1071  for (unsigned ArgI = 0; ArgI < C->arg_size(); ++ArgI)
1072  if (C->getArgOperand(ArgI)->stripPointerCasts() == cpySrc) {
1073  Value *Dest = cpySrc->getType() == cpyDest->getType() ? cpyDest
1074  : CastInst::CreatePointerCast(cpyDest, cpySrc->getType(),
1075  cpyDest->getName(), C);
1076  changedArgument = true;
1077  if (C->getArgOperand(ArgI)->getType() == Dest->getType())
1078  C->setArgOperand(ArgI, Dest);
1079  else
1080  C->setArgOperand(ArgI, CastInst::CreatePointerCast(
1081  Dest, C->getArgOperand(ArgI)->getType(),
1082  Dest->getName(), C));
1083  }
1084 
1085  if (!changedArgument)
1086  return false;
1087 
1088  // If the destination wasn't sufficiently aligned then increase its alignment.
1089  if (!isDestSufficientlyAligned) {
1090  assert(isa<AllocaInst>(cpyDest) && "Can only increase alloca alignment!");
1091  cast<AllocaInst>(cpyDest)->setAlignment(srcAlign);
1092  }
1093 
1094  // Update AA metadata
1095  // FIXME: MD_tbaa_struct and MD_mem_parallel_loop_access should also be
1096  // handled here, but combineMetadata doesn't support them yet
1097  unsigned KnownIDs[] = {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
1098  LLVMContext::MD_noalias,
1099  LLVMContext::MD_invariant_group,
1100  LLVMContext::MD_access_group};
1101  combineMetadata(C, cpyLoad, KnownIDs, true);
1102  if (cpyLoad != cpyStore)
1103  combineMetadata(C, cpyStore, KnownIDs, true);
1104 
1105  ++NumCallSlot;
1106  return true;
1107 }
1108 
1109 /// We've found that the (upward scanning) memory dependence of memcpy 'M' is
1110 /// the memcpy 'MDep'. Try to simplify M to copy from MDep's input if we can.
1111 bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
1112  MemCpyInst *MDep) {
1113  // We can only transforms memcpy's where the dest of one is the source of the
1114  // other.
1115  if (M->getSource() != MDep->getDest() || MDep->isVolatile())
1116  return false;
1117 
1118  // If dep instruction is reading from our current input, then it is a noop
1119  // transfer and substituting the input won't change this instruction. Just
1120  // ignore the input and let someone else zap MDep. This handles cases like:
1121  // memcpy(a <- a)
1122  // memcpy(b <- a)
1123  if (M->getSource() == MDep->getSource())
1124  return false;
1125 
1126  // Second, the length of the memcpy's must be the same, or the preceding one
1127  // must be larger than the following one.
1128  if (MDep->getLength() != M->getLength()) {
1129  auto *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
1130  auto *MLen = dyn_cast<ConstantInt>(M->getLength());
1131  if (!MDepLen || !MLen || MDepLen->getZExtValue() < MLen->getZExtValue())
1132  return false;
1133  }
1134 
1135  // Verify that the copied-from memory doesn't change in between the two
1136  // transfers. For example, in:
1137  // memcpy(a <- b)
1138  // *b = 42;
1139  // memcpy(c <- a)
1140  // It would be invalid to transform the second memcpy into memcpy(c <- b).
1141  //
1142  // TODO: If the code between M and MDep is transparent to the destination "c",
1143  // then we could still perform the xform by moving M up to the first memcpy.
1144  // TODO: It would be sufficient to check the MDep source up to the memcpy
1145  // size of M, rather than MDep.
1147  MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(M)))
1148  return false;
1149 
1150  // If the dest of the second might alias the source of the first, then the
1151  // source and dest might overlap. In addition, if the source of the first
1152  // points to constant memory, they won't overlap by definition. Otherwise, we
1153  // still want to eliminate the intermediate value, but we have to generate a
1154  // memmove instead of memcpy.
1155  bool UseMemMove = false;
1156  if (isModSet(AA->getModRefInfo(M, MemoryLocation::getForSource(MDep))))
1157  UseMemMove = true;
1158 
1159  // If all checks passed, then we can transform M.
1160  LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy->memcpy src:\n"
1161  << *MDep << '\n' << *M << '\n');
1162 
1163  // TODO: Is this worth it if we're creating a less aligned memcpy? For
1164  // example we could be moving from movaps -> movq on x86.
1165  IRBuilder<> Builder(M);
1166  Instruction *NewM;
1167  if (UseMemMove)
1168  NewM = Builder.CreateMemMove(M->getRawDest(), M->getDestAlign(),
1169  MDep->getRawSource(), MDep->getSourceAlign(),
1170  M->getLength(), M->isVolatile());
1171  else if (isa<MemCpyInlineInst>(M)) {
1172  // llvm.memcpy may be promoted to llvm.memcpy.inline, but the converse is
1173  // never allowed since that would allow the latter to be lowered as a call
1174  // to an external function.
1175  NewM = Builder.CreateMemCpyInline(
1176  M->getRawDest(), M->getDestAlign(), MDep->getRawSource(),
1177  MDep->getSourceAlign(), M->getLength(), M->isVolatile());
1178  } else
1179  NewM = Builder.CreateMemCpy(M->getRawDest(), M->getDestAlign(),
1180  MDep->getRawSource(), MDep->getSourceAlign(),
1181  M->getLength(), M->isVolatile());
1182 
1183  assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M)));
1184  auto *LastDef = cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M));
1185  auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
1186  MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1187 
1188  // Remove the instruction we're replacing.
1189  eraseInstruction(M);
1190  ++NumMemCpyInstr;
1191  return true;
1192 }
1193 
1194 /// We've found that the (upward scanning) memory dependence of \p MemCpy is
1195 /// \p MemSet. Try to simplify \p MemSet to only set the trailing bytes that
1196 /// weren't copied over by \p MemCpy.
1197 ///
1198 /// In other words, transform:
1199 /// \code
1200 /// memset(dst, c, dst_size);
1201 /// memcpy(dst, src, src_size);
1202 /// \endcode
1203 /// into:
1204 /// \code
1205 /// memcpy(dst, src, src_size);
1206 /// memset(dst + src_size, c, dst_size <= src_size ? 0 : dst_size - src_size);
1207 /// \endcode
1208 bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
1209  MemSetInst *MemSet) {
1210  // We can only transform memset/memcpy with the same destination.
1211  if (!AA->isMustAlias(MemSet->getDest(), MemCpy->getDest()))
1212  return false;
1213 
1214  // Check that src and dst of the memcpy aren't the same. While memcpy
1215  // operands cannot partially overlap, exact equality is allowed.
1216  if (isModSet(AA->getModRefInfo(MemCpy, MemoryLocation::getForSource(MemCpy))))
1217  return false;
1218 
1219  // We know that dst up to src_size is not written. We now need to make sure
1220  // that dst up to dst_size is not accessed. (If we did not move the memset,
1221  // checking for reads would be sufficient.)
1223  MSSA->getMemoryAccess(MemSet),
1224  MSSA->getMemoryAccess(MemCpy)))
1225  return false;
1226 
1227  // Use the same i8* dest as the memcpy, killing the memset dest if different.
1228  Value *Dest = MemCpy->getRawDest();
1229  Value *DestSize = MemSet->getLength();
1230  Value *SrcSize = MemCpy->getLength();
1231 
1232  if (mayBeVisibleThroughUnwinding(Dest, MemSet, MemCpy))
1233  return false;
1234 
1235  // If the sizes are the same, simply drop the memset instead of generating
1236  // a replacement with zero size.
1237  if (DestSize == SrcSize) {
1238  eraseInstruction(MemSet);
1239  return true;
1240  }
1241 
1242  // By default, create an unaligned memset.
1243  unsigned Align = 1;
1244  // If Dest is aligned, and SrcSize is constant, use the minimum alignment
1245  // of the sum.
1246  const unsigned DestAlign =
1247  std::max(MemSet->getDestAlignment(), MemCpy->getDestAlignment());
1248  if (DestAlign > 1)
1249  if (auto *SrcSizeC = dyn_cast<ConstantInt>(SrcSize))
1250  Align = MinAlign(SrcSizeC->getZExtValue(), DestAlign);
1251 
1252  IRBuilder<> Builder(MemCpy);
1253 
1254  // If the sizes have different types, zext the smaller one.
1255  if (DestSize->getType() != SrcSize->getType()) {
1256  if (DestSize->getType()->getIntegerBitWidth() >
1257  SrcSize->getType()->getIntegerBitWidth())
1258  SrcSize = Builder.CreateZExt(SrcSize, DestSize->getType());
1259  else
1260  DestSize = Builder.CreateZExt(DestSize, SrcSize->getType());
1261  }
1262 
1263  Value *Ule = Builder.CreateICmpULE(DestSize, SrcSize);
1264  Value *SizeDiff = Builder.CreateSub(DestSize, SrcSize);
1265  Value *MemsetLen = Builder.CreateSelect(
1266  Ule, ConstantInt::getNullValue(DestSize->getType()), SizeDiff);
1267  unsigned DestAS = Dest->getType()->getPointerAddressSpace();
1268  Instruction *NewMemSet = Builder.CreateMemSet(
1269  Builder.CreateGEP(Builder.getInt8Ty(),
1270  Builder.CreatePointerCast(Dest,
1271  Builder.getInt8PtrTy(DestAS)),
1272  SrcSize),
1273  MemSet->getOperand(1), MemsetLen, MaybeAlign(Align));
1274 
1275  assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)) &&
1276  "MemCpy must be a MemoryDef");
1277  // The new memset is inserted after the memcpy, but it is known that its
1278  // defining access is the memset about to be removed which immediately
1279  // precedes the memcpy.
1280  auto *LastDef =
1281  cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy));
1282  auto *NewAccess = MSSAU->createMemoryAccessBefore(
1283  NewMemSet, LastDef->getDefiningAccess(), LastDef);
1284  MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1285 
1286  eraseInstruction(MemSet);
1287  return true;
1288 }
1289 
1290 /// Determine whether the instruction has undefined content for the given Size,
1291 /// either because it was freshly alloca'd or started its lifetime.
1293  MemoryDef *Def, Value *Size) {
1294  if (MSSA->isLiveOnEntryDef(Def))
1295  return isa<AllocaInst>(getUnderlyingObject(V));
1296 
1297  if (auto *II = dyn_cast_or_null<IntrinsicInst>(Def->getMemoryInst())) {
1298  if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
1299  auto *LTSize = cast<ConstantInt>(II->getArgOperand(0));
1300 
1301  if (auto *CSize = dyn_cast<ConstantInt>(Size)) {
1302  if (AA->isMustAlias(V, II->getArgOperand(1)) &&
1303  LTSize->getZExtValue() >= CSize->getZExtValue())
1304  return true;
1305  }
1306 
1307  // If the lifetime.start covers a whole alloca (as it almost always
1308  // does) and we're querying a pointer based on that alloca, then we know
1309  // the memory is definitely undef, regardless of how exactly we alias.
1310  // The size also doesn't matter, as an out-of-bounds access would be UB.
1311  if (auto *Alloca = dyn_cast<AllocaInst>(getUnderlyingObject(V))) {
1312  if (getUnderlyingObject(II->getArgOperand(1)) == Alloca) {
1313  const DataLayout &DL = Alloca->getModule()->getDataLayout();
1314  if (Optional<TypeSize> AllocaSize =
1315  Alloca->getAllocationSizeInBits(DL))
1316  if (*AllocaSize == LTSize->getValue() * 8)
1317  return true;
1318  }
1319  }
1320  }
1321  }
1322 
1323  return false;
1324 }
1325 
1326 /// Transform memcpy to memset when its source was just memset.
1327 /// In other words, turn:
1328 /// \code
1329 /// memset(dst1, c, dst1_size);
1330 /// memcpy(dst2, dst1, dst2_size);
1331 /// \endcode
1332 /// into:
1333 /// \code
1334 /// memset(dst1, c, dst1_size);
1335 /// memset(dst2, c, dst2_size);
1336 /// \endcode
1337 /// When dst2_size <= dst1_size.
1338 bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
1339  MemSetInst *MemSet) {
1340  // Make sure that memcpy(..., memset(...), ...), that is we are memsetting and
1341  // memcpying from the same address. Otherwise it is hard to reason about.
1342  if (!AA->isMustAlias(MemSet->getRawDest(), MemCpy->getRawSource()))
1343  return false;
1344 
1345  Value *MemSetSize = MemSet->getLength();
1346  Value *CopySize = MemCpy->getLength();
1347 
1348  if (MemSetSize != CopySize) {
1349  // Make sure the memcpy doesn't read any more than what the memset wrote.
1350  // Don't worry about sizes larger than i64.
1351 
1352  // A known memset size is required.
1353  auto *CMemSetSize = dyn_cast<ConstantInt>(MemSetSize);
1354  if (!CMemSetSize)
1355  return false;
1356 
1357  // A known memcpy size is also required.
1358  auto *CCopySize = dyn_cast<ConstantInt>(CopySize);
1359  if (!CCopySize)
1360  return false;
1361  if (CCopySize->getZExtValue() > CMemSetSize->getZExtValue()) {
1362  // If the memcpy is larger than the memset, but the memory was undef prior
1363  // to the memset, we can just ignore the tail. Technically we're only
1364  // interested in the bytes from MemSetSize..CopySize here, but as we can't
1365  // easily represent this location, we use the full 0..CopySize range.
1366  MemoryLocation MemCpyLoc = MemoryLocation::getForSource(MemCpy);
1367  bool CanReduceSize = false;
1368  MemoryUseOrDef *MemSetAccess = MSSA->getMemoryAccess(MemSet);
1369  MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
1370  MemSetAccess->getDefiningAccess(), MemCpyLoc);
1371  if (auto *MD = dyn_cast<MemoryDef>(Clobber))
1372  if (hasUndefContents(MSSA, AA, MemCpy->getSource(), MD, CopySize))
1373  CanReduceSize = true;
1374 
1375  if (!CanReduceSize)
1376  return false;
1377  CopySize = MemSetSize;
1378  }
1379  }
1380 
1381  IRBuilder<> Builder(MemCpy);
1382  Instruction *NewM =
1383  Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1),
1384  CopySize, MaybeAlign(MemCpy->getDestAlignment()));
1385  auto *LastDef =
1386  cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy));
1387  auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
1388  MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1389 
1390  return true;
1391 }
1392 
1393 /// Perform simplification of memcpy's. If we have memcpy A
1394 /// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite
1395 /// B to be a memcpy from X to Z (or potentially a memmove, depending on
1396 /// circumstances). This allows later passes to remove the first memcpy
1397 /// altogether.
1398 bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
1399  // We can only optimize non-volatile memcpy's.
1400  if (M->isVolatile()) return false;
1401 
1402  // If the source and destination of the memcpy are the same, then zap it.
1403  if (M->getSource() == M->getDest()) {
1404  ++BBI;
1405  eraseInstruction(M);
1406  return true;
1407  }
1408 
1409  // If copying from a constant, try to turn the memcpy into a memset.
1410  if (auto *GV = dyn_cast<GlobalVariable>(M->getSource()))
1411  if (GV->isConstant() && GV->hasDefinitiveInitializer())
1412  if (Value *ByteVal = isBytewiseValue(GV->getInitializer(),
1413  M->getModule()->getDataLayout())) {
1414  IRBuilder<> Builder(M);
1415  Instruction *NewM =
1416  Builder.CreateMemSet(M->getRawDest(), ByteVal, M->getLength(),
1417  MaybeAlign(M->getDestAlignment()), false);
1418  auto *LastDef =
1419  cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M));
1420  auto *NewAccess =
1421  MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
1422  MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1423 
1424  eraseInstruction(M);
1425  ++NumCpyToSet;
1426  return true;
1427  }
1428 
1429  MemoryUseOrDef *MA = MSSA->getMemoryAccess(M);
1430  // FIXME: Not using getClobberingMemoryAccess() here due to PR54682.
1431  MemoryAccess *AnyClobber = MA->getDefiningAccess();
1433  const MemoryAccess *DestClobber =
1434  MSSA->getWalker()->getClobberingMemoryAccess(AnyClobber, DestLoc);
1435 
1436  // Try to turn a partially redundant memset + memcpy into
1437  // memcpy + smaller memset. We don't need the memcpy size for this.
1438  // The memcpy most post-dom the memset, so limit this to the same basic
1439  // block. A non-local generalization is likely not worthwhile.
1440  if (auto *MD = dyn_cast<MemoryDef>(DestClobber))
1441  if (auto *MDep = dyn_cast_or_null<MemSetInst>(MD->getMemoryInst()))
1442  if (DestClobber->getBlock() == M->getParent())
1443  if (processMemSetMemCpyDependence(M, MDep))
1444  return true;
1445 
1446  MemoryAccess *SrcClobber = MSSA->getWalker()->getClobberingMemoryAccess(
1447  AnyClobber, MemoryLocation::getForSource(M));
1448 
1449  // There are four possible optimizations we can do for memcpy:
1450  // a) memcpy-memcpy xform which exposes redundance for DSE.
1451  // b) call-memcpy xform for return slot optimization.
1452  // c) memcpy from freshly alloca'd space or space that has just started
1453  // its lifetime copies undefined data, and we can therefore eliminate
1454  // the memcpy in favor of the data that was already at the destination.
1455  // d) memcpy from a just-memset'd source can be turned into memset.
1456  if (auto *MD = dyn_cast<MemoryDef>(SrcClobber)) {
1457  if (Instruction *MI = MD->getMemoryInst()) {
1458  if (auto *CopySize = dyn_cast<ConstantInt>(M->getLength())) {
1459  if (auto *C = dyn_cast<CallInst>(MI)) {
1460  // FIXME: Can we pass in either of dest/src alignment here instead
1461  // of conservatively taking the minimum?
1462  Align Alignment = std::min(M->getDestAlign().valueOrOne(),
1463  M->getSourceAlign().valueOrOne());
1464  if (performCallSlotOptzn(
1465  M, M, M->getDest(), M->getSource(),
1466  TypeSize::getFixed(CopySize->getZExtValue()), Alignment,
1467  [C]() -> CallInst * { return C; })) {
1468  LLVM_DEBUG(dbgs() << "Performed call slot optimization:\n"
1469  << " call: " << *C << "\n"
1470  << " memcpy: " << *M << "\n");
1471  eraseInstruction(M);
1472  ++NumMemCpyInstr;
1473  return true;
1474  }
1475  }
1476  }
1477  if (auto *MDep = dyn_cast<MemCpyInst>(MI))
1478  return processMemCpyMemCpyDependence(M, MDep);
1479  if (auto *MDep = dyn_cast<MemSetInst>(MI)) {
1480  if (performMemCpyToMemSetOptzn(M, MDep)) {
1481  LLVM_DEBUG(dbgs() << "Converted memcpy to memset\n");
1482  eraseInstruction(M);
1483  ++NumCpyToSet;
1484  return true;
1485  }
1486  }
1487  }
1488 
1489  if (hasUndefContents(MSSA, AA, M->getSource(), MD, M->getLength())) {
1490  LLVM_DEBUG(dbgs() << "Removed memcpy from undef\n");
1491  eraseInstruction(M);
1492  ++NumMemCpyInstr;
1493  return true;
1494  }
1495  }
1496 
1497  return false;
1498 }
1499 
1500 /// Transforms memmove calls to memcpy calls when the src/dst are guaranteed
1501 /// not to alias.
1502 bool MemCpyOptPass::processMemMove(MemMoveInst *M) {
1503  // See if the source could be modified by this memmove potentially.
1504  if (isModSet(AA->getModRefInfo(M, MemoryLocation::getForSource(M))))
1505  return false;
1506 
1507  LLVM_DEBUG(dbgs() << "MemCpyOptPass: Optimizing memmove -> memcpy: " << *M
1508  << "\n");
1509 
1510  // If not, then we know we can transform this.
1511  Type *ArgTys[3] = { M->getRawDest()->getType(),
1512  M->getRawSource()->getType(),
1513  M->getLength()->getType() };
1514  M->setCalledFunction(Intrinsic::getDeclaration(M->getModule(),
1515  Intrinsic::memcpy, ArgTys));
1516 
1517  // For MemorySSA nothing really changes (except that memcpy may imply stricter
1518  // aliasing guarantees).
1519 
1520  ++NumMoveToCpy;
1521  return true;
1522 }
1523 
1524 /// This is called on every byval argument in call sites.
1525 bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) {
1526  const DataLayout &DL = CB.getCaller()->getParent()->getDataLayout();
1527  // Find out what feeds this byval argument.
1528  Value *ByValArg = CB.getArgOperand(ArgNo);
1529  Type *ByValTy = CB.getParamByValType(ArgNo);
1530  TypeSize ByValSize = DL.getTypeAllocSize(ByValTy);
1531  MemoryLocation Loc(ByValArg, LocationSize::precise(ByValSize));
1532  MemoryUseOrDef *CallAccess = MSSA->getMemoryAccess(&CB);
1533  if (!CallAccess)
1534  return false;
1535  MemCpyInst *MDep = nullptr;
1536  MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
1537  CallAccess->getDefiningAccess(), Loc);
1538  if (auto *MD = dyn_cast<MemoryDef>(Clobber))
1539  MDep = dyn_cast_or_null<MemCpyInst>(MD->getMemoryInst());
1540 
1541  // If the byval argument isn't fed by a memcpy, ignore it. If it is fed by
1542  // a memcpy, see if we can byval from the source of the memcpy instead of the
1543  // result.
1544  if (!MDep || MDep->isVolatile() ||
1545  ByValArg->stripPointerCasts() != MDep->getDest())
1546  return false;
1547 
1548  // The length of the memcpy must be larger or equal to the size of the byval.
1549  auto *C1 = dyn_cast<ConstantInt>(MDep->getLength());
1550  if (!C1 || !TypeSize::isKnownGE(
1551  TypeSize::getFixed(C1->getValue().getZExtValue()), ByValSize))
1552  return false;
1553 
1554  // Get the alignment of the byval. If the call doesn't specify the alignment,
1555  // then it is some target specific value that we can't know.
1556  MaybeAlign ByValAlign = CB.getParamAlign(ArgNo);
1557  if (!ByValAlign) return false;
1558 
1559  // If it is greater than the memcpy, then we check to see if we can force the
1560  // source of the memcpy to the alignment we need. If we fail, we bail out.
1561  MaybeAlign MemDepAlign = MDep->getSourceAlign();
1562  if ((!MemDepAlign || *MemDepAlign < *ByValAlign) &&
1563  getOrEnforceKnownAlignment(MDep->getSource(), ByValAlign, DL, &CB, AC,
1564  DT) < *ByValAlign)
1565  return false;
1566 
1567  // The address space of the memcpy source must match the byval argument
1568  if (MDep->getSource()->getType()->getPointerAddressSpace() !=
1569  ByValArg->getType()->getPointerAddressSpace())
1570  return false;
1571 
1572  // Verify that the copied-from memory doesn't change in between the memcpy and
1573  // the byval call.
1574  // memcpy(a <- b)
1575  // *b = 42;
1576  // foo(*a)
1577  // It would be invalid to transform the second memcpy into foo(*b).
1579  MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(&CB)))
1580  return false;
1581 
1582  Value *TmpCast = MDep->getSource();
1583  if (MDep->getSource()->getType() != ByValArg->getType()) {
1584  BitCastInst *TmpBitCast = new BitCastInst(MDep->getSource(), ByValArg->getType(),
1585  "tmpcast", &CB);
1586  // Set the tmpcast's DebugLoc to MDep's
1587  TmpBitCast->setDebugLoc(MDep->getDebugLoc());
1588  TmpCast = TmpBitCast;
1589  }
1590 
1591  LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy to byval:\n"
1592  << " " << *MDep << "\n"
1593  << " " << CB << "\n");
1594 
1595  // Otherwise we're good! Update the byval argument.
1596  CB.setArgOperand(ArgNo, TmpCast);
1597  ++NumMemCpyInstr;
1598  return true;
1599 }
1600 
1601 /// Executes one iteration of MemCpyOptPass.
1602 bool MemCpyOptPass::iterateOnFunction(Function &F) {
1603  bool MadeChange = false;
1604 
1605  // Walk all instruction in the function.
1606  for (BasicBlock &BB : F) {
1607  // Skip unreachable blocks. For example processStore assumes that an
1608  // instruction in a BB can't be dominated by a later instruction in the
1609  // same BB (which is a scenario that can happen for an unreachable BB that
1610  // has itself as a predecessor).
1611  if (!DT->isReachableFromEntry(&BB))
1612  continue;
1613 
1614  for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) {
1615  // Avoid invalidating the iterator.
1616  Instruction *I = &*BI++;
1617 
1618  bool RepeatInstruction = false;
1619 
1620  if (auto *SI = dyn_cast<StoreInst>(I))
1621  MadeChange |= processStore(SI, BI);
1622  else if (auto *M = dyn_cast<MemSetInst>(I))
1623  RepeatInstruction = processMemSet(M, BI);
1624  else if (auto *M = dyn_cast<MemCpyInst>(I))
1625  RepeatInstruction = processMemCpy(M, BI);
1626  else if (auto *M = dyn_cast<MemMoveInst>(I))
1627  RepeatInstruction = processMemMove(M);
1628  else if (auto *CB = dyn_cast<CallBase>(I)) {
1629  for (unsigned i = 0, e = CB->arg_size(); i != e; ++i)
1630  if (CB->isByValArgument(i))
1631  MadeChange |= processByValArgument(*CB, i);
1632  }
1633 
1634  // Reprocess the instruction if desired.
1635  if (RepeatInstruction) {
1636  if (BI != BB.begin())
1637  --BI;
1638  MadeChange = true;
1639  }
1640  }
1641  }
1642 
1643  return MadeChange;
1644 }
1645 
1647  auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
1648  auto *AA = &AM.getResult<AAManager>(F);
1649  auto *AC = &AM.getResult<AssumptionAnalysis>(F);
1650  auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
1651  auto *MSSA = &AM.getResult<MemorySSAAnalysis>(F);
1652 
1653  bool MadeChange = runImpl(F, &TLI, AA, AC, DT, &MSSA->getMSSA());
1654  if (!MadeChange)
1655  return PreservedAnalyses::all();
1656 
1657  PreservedAnalyses PA;
1658  PA.preserveSet<CFGAnalyses>();
1660  return PA;
1661 }
1662 
1664  AliasAnalysis *AA_, AssumptionCache *AC_,
1665  DominatorTree *DT_, MemorySSA *MSSA_) {
1666  bool MadeChange = false;
1667  TLI = TLI_;
1668  AA = AA_;
1669  AC = AC_;
1670  DT = DT_;
1671  MSSA = MSSA_;
1672  MemorySSAUpdater MSSAU_(MSSA_);
1673  MSSAU = &MSSAU_;
1674 
1675  while (true) {
1676  if (!iterateOnFunction(F))
1677  break;
1678  MadeChange = true;
1679  }
1680 
1681  if (VerifyMemorySSA)
1682  MSSA_->verifyMemorySSA();
1683 
1684  return MadeChange;
1685 }
1686 
1687 /// This is the main transformation entry point for a function.
1689  if (skipFunction(F))
1690  return false;
1691 
1692  auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
1693  auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
1694  auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1695  auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1696  auto *MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
1697 
1698  return Impl.runImpl(F, TLI, AA, AC, DT, MSSA);
1699 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:76
i
i
Definition: README.txt:29
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
AssumptionCache.h
llvm::AAManager
A manager for alias analyses.
Definition: AliasAnalysis.h:1299
llvm::MemIntrinsicBase::getDestAlignment
unsigned getDestAlignment() const
FIXME: Remove this function once transition to Align is over.
Definition: IntrinsicInst.h:748
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:104
MathExtras.h
llvm::MemoryLocation::get
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
Definition: MemoryLocation.cpp:35
llvm::combineMetadata
void combineMetadata(Instruction *K, const Instruction *J, ArrayRef< unsigned > KnownIDs, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition: Local.cpp:2522
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
llvm::SmallVectorImpl::erase
iterator erase(const_iterator CI)
Definition: SmallVector.h:724
llvm::tgtok::Def
@ Def
Definition: TGLexer.h:50
llvm::MemorySSA::verifyMemorySSA
void verifyMemorySSA(VerificationLevel=VerificationLevel::Fast) const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
Definition: MemorySSA.cpp:1906
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::Instruction::getModule
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:65
llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:53
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::Value::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
llvm::Intrinsic::getDeclaration
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1410
llvm::MemorySSAUpdater::createMemoryAccessBefore
MemoryUseOrDef * createMemoryAccessBefore(Instruction *I, MemoryAccess *Definition, MemoryUseOrDef *InsertPt)
Create a MemoryAccess in MemorySSA before or after an existing MemoryAccess.
Definition: MemorySSAUpdater.cpp:1437
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:87
llvm::isModOrRefSet
LLVM_NODISCARD bool isModOrRefSet(const ModRefInfo MRI)
Definition: AliasAnalysis.h:189
IntrinsicInst.h
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:780
Scalar.h
llvm::TypeSize::getFixedSize
ScalarTy getFixedSize() const
Definition: TypeSize.h:430
Loads.h
MemorySSAUpdater.h
llvm::Function
Definition: Function.h:60
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
Pass.h
llvm::MemMoveInst
This class wraps the llvm.memmove intrinsic.
Definition: IntrinsicInst.h:1037
llvm::BitCastInst
This class represents a no-op cast from one type to another.
Definition: Instructions.h:5225
C1
instcombine should handle this C2 when C1
Definition: README.txt:263
llvm::Type::getScalarType
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:309
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1185
Statistic.h
CaptureTracking.h
llvm::cast
decltype(auto) LLVM_NODISCARD cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition: Casting.h:572
memcpyopt
memcpyopt
Definition: MemCpyOptimizer.cpp:304
llvm::Type::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:729
llvm::IRBuilder<>
llvm::MemorySSA::dominates
bool dominates(const MemoryAccess *A, const MemoryAccess *B) const
Given two memory accesses in potentially different blocks, determine whether MemoryAccess A dominates...
Definition: MemorySSA.cpp:2159
ValueTracking.h
Local.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
GlobalsModRef.h
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:139
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
Module.h
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:380
llvm::getOrEnforceKnownAlignment
Align getOrEnforceKnownAlignment(Value *V, MaybeAlign PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
Definition: Local.cpp:1356
llvm::ilist_node_impl::getReverseIterator
reverse_self_iterator getReverseIterator()
Definition: ilist_node.h:85
llvm::sys::path::end
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:235
llvm::sys::path::begin
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:226
llvm::isBytewiseValue
Value * isBytewiseValue(Value *V, const DataLayout &DL)
If the specified value can be set by repeating the same byte in memory, return the i8 value that it i...
Definition: ValueTracking.cpp:3809
llvm::MemCpyOptPass::runImpl
bool runImpl(Function &F, TargetLibraryInfo *TLI, AAResults *AA, AssumptionCache *AC, DominatorTree *DT, MemorySSA *MSSA)
Definition: MemCpyOptimizer.cpp:1663
llvm::Optional< int64_t >
T
#define T
Definition: Mips16ISelLowering.cpp:341
llvm::createMemCpyOptPass
FunctionPass * createMemCpyOptPass()
The public interface to this file...
Definition: MemCpyOptimizer.cpp:294
llvm::CallBase::isByValArgument
bool isByValArgument(unsigned ArgNo) const
Determine whether this argument is passed by value.
Definition: InstrTypes.h:1678
STLExtras.h
llvm::LoadInst::getPointerOperand
Value * getPointerOperand()
Definition: Instructions.h:268
llvm::LoadInst::getAlign
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:224
accessedBetween
static bool accessedBetween(AliasAnalysis &AA, MemoryLocation Loc, const MemoryUseOrDef *Start, const MemoryUseOrDef *End)
Definition: MemCpyOptimizer.cpp:336
llvm::LinearPolySize::isScalable
bool isScalable() const
Returns whether the size is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:298
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:240
llvm::MemSetBase::getValue
Value * getValue() const
Definition: IntrinsicInst.h:854
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
AliasAnalysis.h
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::isIdentifiedFunctionLocal
bool isIdentifiedFunctionLocal(const Value *V)
Return true if V is umabigously identified at the function-level.
Definition: AliasAnalysis.cpp:986
llvm::DominatorTree::dominates
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
llvm::isPointerOffset
Optional< int64_t > isPointerOffset(const Value *Ptr1, const Value *Ptr2, const DataLayout &DL)
If Ptr1 is provably equal to Ptr2 plus a constant offset, return that offset.
Definition: ValueTracking.cpp:7177
Instruction.h
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::MinAlign
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:699
llvm::MemorySSAWrapperPass
Legacy analysis pass which computes MemorySSA.
Definition: MemorySSA.h:998
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
Constants.h
llvm::AAResults
Definition: AliasAnalysis.h:511
llvm::MemorySSA::isLiveOnEntryDef
bool isLiveOnEntryDef(const MemoryAccess *MA) const
Return true if MA represents the live on entry value.
Definition: MemorySSA.h:751
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::User
Definition: User.h:44
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(MemCpyOptLegacyPass, "memcpyopt", "MemCpy Optimization", false, false) INITIALIZE_PASS_END(MemCpyOptLegacyPass
Intrinsics.h
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::ARM_PROC::A
@ A
Definition: ARMBaseInfo.h:34
InstrTypes.h
llvm::initializeMemCpyOptLegacyPassPass
void initializeMemCpyOptLegacyPassPass(PassRegistry &)
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::MemoryLocation::getForSource
static MemoryLocation getForSource(const MemTransferInst *MTI)
Return a location representing the source of a memory transfer.
Definition: MemoryLocation.cpp:93
llvm::MemTransferBase::getRawSource
Value * getRawSource() const
Return the arguments to the instruction.
Definition: IntrinsicInst.h:794
llvm::MemorySSAUpdater::moveAfter
void moveAfter(MemoryUseOrDef *What, MemoryUseOrDef *Where)
Definition: MemorySSAUpdater.cpp:1196
TargetLibraryInfo.h
DenseSet.h
false
Definition: StackSlotColoring.cpp:141
llvm::MaybeAlign
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:109
llvm::Instruction
Definition: Instruction.h:42
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:302
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
MemCpyOptimizer.h
llvm::LocationSize::precise
static LocationSize precise(uint64_t Value)
Definition: MemoryLocation.h:101
llvm::CallBase::getParamByValType
Type * getParamByValType(unsigned ArgNo) const
Extract the byval type for a call or parameter.
Definition: InstrTypes.h:1751
llvm::getUnderlyingObject
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value,...
Definition: ValueTracking.cpp:4343
Align
uint64_t Align
Definition: ELFObjHandler.cpp:81
llvm::MemoryUseOrDef::getMemoryInst
Instruction * getMemoryInst() const
Get the instruction that this MemoryUse represents.
Definition: MemorySSA.h:259
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::isModSet
LLVM_NODISCARD bool isModSet(const ModRefInfo MRI)
Definition: AliasAnalysis.h:196
llvm::MemTransferBase::getSourceAlign
MaybeAlign getSourceAlign() const
Definition: IntrinsicInst.h:819
llvm::None
const NoneType None
Definition: None.h:24
llvm::LinearPolySize< TypeSize >::getFixed
static TypeSize getFixed(ScalarTy MinVal)
Definition: TypeSize.h:283
llvm::MemoryAccess::getBlock
BasicBlock * getBlock() const
Definition: MemorySSA.h:164
llvm::Type::getIntegerBitWidth
unsigned getIntegerBitWidth() const
Definition: DerivedTypes.h:97
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
Type.h
llvm::CallBase::getCaller
Function * getCaller()
Helper to get the caller (the parent function).
Definition: Instructions.cpp:282
llvm::MemSetInst
This class wraps the llvm.memset intrinsic.
Definition: IntrinsicInst.h:993
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::cl::ZeroOrMore
@ ZeroOrMore
Definition: CommandLine.h:116
Optimization
MemCpy Optimization
Definition: MemCpyOptimizer.cpp:304
G
const DataFlowGraph & G
Definition: RDFGraph.cpp:200
llvm::DenseSet
Implements a dense probed hash-table based set.
Definition: DenseSet.h:268
llvm::MemorySSAWalker::getClobberingMemoryAccess
MemoryAccess * getClobberingMemoryAccess(const Instruction *I)
Given a memory Mod/Ref/ModRef'ing instruction, calling this will give you the nearest dominating Memo...
Definition: MemorySSA.h:1058
llvm::SPIRV::Decoration::Alignment
@ Alignment
BasicBlock.h
llvm::cl::opt< bool >
llvm::RISCVFenceField::O
@ O
Definition: RISCVBaseInfo.h:239
mayBeVisibleThroughUnwinding
MemCpy static false bool mayBeVisibleThroughUnwinding(Value *V, Instruction *Start, Instruction *End)
Definition: MemCpyOptimizer.cpp:309
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:305
llvm::AMDGPU::Hwreg::Offset
Offset
Definition: SIDefines.h:406
llvm::TargetLibraryInfoWrapperPass
Definition: TargetLibraryInfo.h:468
uint64_t
llvm::MemorySSAUpdater
Definition: MemorySSAUpdater.h:54
llvm::GlobalValue::getParent
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:577
llvm::MemorySSAUpdater::getMemorySSA
MemorySSA * getMemorySSA() const
Get handle on MemorySSA.
Definition: MemorySSAUpdater.h:242
llvm::AssumptionAnalysis
A function analysis which provides an AssumptionCache.
Definition: AssumptionCache.h:173
llvm::PreservedAnalyses::preserve
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:173
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
MemoryLocation.h
llvm::MemorySSA
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:714
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::MemorySSA::getMemoryAccess
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
Definition: MemorySSA.h:731
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
llvm::MemCpyOptPass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition: MemCpyOptimizer.cpp:1646
llvm::MemoryDef
Represents a read-write access to memory, whether it is a must-alias, or a may-alias.
Definition: MemorySSA.h:386
llvm::Instruction::setDebugLoc
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:364
llvm::ModRefInfo
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
Definition: AliasAnalysis.h:148
IT
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::ZeroOrMore, cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
IRBuilder.h
llvm::SmallVectorImpl::const_iterator
typename SuperClass::const_iterator const_iterator
Definition: SmallVector.h:559
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::MachineBasicBlock::size
unsigned size() const
Definition: MachineBasicBlock.h:248
memcpy
<%struct.s * > cast struct s *S to sbyte *< sbyte * > sbyte uint cast struct s *agg result to sbyte *< sbyte * > sbyte uint cast struct s *memtmp to sbyte *< sbyte * > sbyte uint ret void llc ends up issuing two memcpy or custom lower memcpy(of small size) to be ldmia/stmia. I think option 2 is better but the current register allocator cannot allocate a chunk of registers at a time. A feasible temporary solution is to use specific physical registers at the lowering time for small(<
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::DominatorTree::isReachableFromEntry
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:335
llvm::LinearPolySize< TypeSize >::isKnownGE
static bool isKnownGE(const LinearPolySize &LHS, const LinearPolySize &RHS)
Definition: TypeSize.h:346
iterator_range.h
llvm::TargetLibraryInfo::has
bool has(LibFunc F) const
Tests whether a library function is available.
Definition: TargetLibraryInfo.h:328
function
print Print MemDeps of function
Definition: MemDepPrinter.cpp:82
llvm::MemorySSAAnalysis
An analysis that produces MemorySSA for a function.
Definition: MemorySSA.h:948
llvm::MemorySSA::getWalker
MemorySSAWalker * getWalker()
Definition: MemorySSA.cpp:1604
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:651
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
addRange
static void addRange(SmallVectorImpl< ConstantInt * > &EndPoints, ConstantInt *Low, ConstantInt *High)
Definition: Metadata.cpp:1043
llvm::MemorySSAUpdater::createMemoryAccessAfter
MemoryUseOrDef * createMemoryAccessAfter(Instruction *I, MemoryAccess *Definition, MemoryAccess *InsertPt)
Definition: MemorySSAUpdater.cpp:1447
llvm::MemIntrinsicBase::getDest
Value * getDest() const
This is just like getRawDest, but it strips off any cast instructions (including addrspacecast) that ...
Definition: IntrinsicInst.h:740
llvm::AssumptionCacheTracker
An immutable pass that tracks lazily created AssumptionCache objects.
Definition: AssumptionCache.h:202
None.h
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::MemorySSAUpdater::insertDef
void insertDef(MemoryDef *Def, bool RenameUses=false)
Insert a definition into the MemorySSA IR.
Definition: MemorySSAUpdater.cpp:307
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1612
llvm::CallBase::getParamAlign
MaybeAlign getParamAlign(unsigned ArgNo) const
Extract the alignment for a call or parameter (0=unknown).
Definition: InstrTypes.h:1742
DataLayout.h
llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:263
llvm::LoadInst::isSimple
bool isSimple() const
Definition: Instructions.h:260
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:42
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::isNotVisibleOnUnwind
bool isNotVisibleOnUnwind(const Value *Object, bool &RequiresNoCaptureBeforeUnwind)
Return true if Object memory is not visible after an unwind, in the sense that program semantics cann...
Definition: AliasAnalysis.cpp:990
llvm::CFGAnalyses
Represents analyses that only rely on functions' control flow.
Definition: PassManager.h:113
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
llvm::append_range
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
Definition: STLExtras.h:1811
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:82
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::MemoryAccess
Definition: MemorySSA.h:142
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:305
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:176
llvm::CallBase::setArgOperand
void setArgOperand(unsigned i, Value *v)
Definition: InstrTypes.h:1346
llvm::Value::stripPointerCasts
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:682
llvm::ConstantInt::getZExtValue
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:142
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
llvm::partition_point
auto partition_point(R &&Range, Predicate P)
Binary search for the first iterator in a range where a predicate is false.
Definition: STLExtras.h:1762
llvm::commonAlignment
Align commonAlignment(Align A, Align B)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:211
llvm::empty
constexpr bool empty(const T &RangeOrContainer)
Test whether RangeOrContainer is empty. Similar to C++17 std::empty.
Definition: STLExtras.h:268
llvm::Constant::getNullValue
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:350
llvm::MemoryUseOrDef::getDefiningAccess
MemoryAccess * getDefiningAccess() const
Get the access that produces the memory state used by this Use.
Definition: MemorySSA.h:262
llvm::MemIntrinsicBase::getLength
Value * getLength() const
Definition: IntrinsicInst.h:731
hasUndefContents
static bool hasUndefContents(MemorySSA *MSSA, AliasAnalysis *AA, Value *V, MemoryDef *Def, Value *Size)
Determine whether the instruction has undefined content for the given Size, either because it was fre...
Definition: MemCpyOptimizer.cpp:1292
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
llvm::CallBase::arg_size
unsigned arg_size() const
Definition: InstrTypes.h:1339
GlobalVariable.h
llvm::isGuaranteedToTransferExecutionToSuccessor
bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
Definition: ValueTracking.cpp:5277
llvm::TypeSize
Definition: TypeSize.h:421
Casting.h
Function.h
PassManager.h
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:222
EnableMemCpyOptWithoutLibcalls
static cl::opt< bool > EnableMemCpyOptWithoutLibcalls("enable-memcpyopt-without-libcalls", cl::init(false), cl::Hidden, cl::ZeroOrMore, cl::desc("Enable memcpyopt even when libcalls are disabled"))
llvm::MemoryUseOrDef
Class that has the common methods + fields of memory uses/defs.
Definition: MemorySSA.h:252
writtenBetween
static bool writtenBetween(MemorySSA *MSSA, AliasAnalysis &AA, MemoryLocation Loc, const MemoryUseOrDef *Start, const MemoryUseOrDef *End)
Definition: MemCpyOptimizer.cpp:351
llvm::MemCpyOptPass
Definition: MemCpyOptimizer.h:39
AA
llvm::DominatorTreeAnalysis
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:267
MemorySSA.h
Instructions.h
llvm::MemCpyInst
This class wraps the llvm.memcpy intrinsic.
Definition: IntrinsicInst.h:1024
llvm::PreservedAnalyses::preserveSet
void preserveSet()
Mark an analysis set as preserved.
Definition: PassManager.h:188
llvm::MemIntrinsic::isVolatile
bool isVolatile() const
Definition: IntrinsicInst.h:971
SmallVector.h
llvm::Instruction::getDebugLoc
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:367
User.h
Dominators.h
llvm::SmallVectorImpl::iterator
typename SuperClass::iterator iterator
Definition: SmallVector.h:558
llvm::CallBase::getArgOperand
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1341
llvm::AAResultsWrapperPass
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Definition: AliasAnalysis.h:1347
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:91
llvm::GlobalsAAWrapperPass
Legacy wrapper pass to provide the GlobalsAAResult object.
Definition: GlobalsModRef.h:148
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
llvm::MemIntrinsicBase::getRawDest
Value * getRawDest() const
Definition: IntrinsicInst.h:725
llvm::CallBase
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1174
llvm::Module::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:398
DerivedTypes.h
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:42
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1474
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:172
llvm::CastInst::CreatePointerCast
static CastInst * CreatePointerCast(Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd)
Create a BitCast AddrSpaceCast, or a PtrToInt cast instruction.
Definition: Instructions.cpp:3272
llvm::MemoryLocation::getForDest
static MemoryLocation getForDest(const MemIntrinsic *MI)
Return a location representing the destination of a memory set or transfer.
Definition: MemoryLocation.cpp:106
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::VerifyMemorySSA
bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition: MemorySSA.cpp:91
LLVMContext.h
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:394
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::cl::desc
Definition: CommandLine.h:405
raw_ostream.h
Value.h
InitializePasses.h
llvm::PointerMayBeCapturedBefore
bool PointerMayBeCapturedBefore(const Value *V, bool ReturnCaptures, bool StoreCaptures, const Instruction *I, const DominatorTree *DT, bool IncludeI=false, unsigned MaxUsesToExplore=0, const LoopInfo *LI=nullptr)
PointerMayBeCapturedBefore - Return true if this pointer value may be captured by the enclosing funct...
Definition: CaptureTracking.cpp:269
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
Debug.h
llvm::TargetLibraryAnalysis
Analysis pass providing the TargetLibraryInfo.
Definition: TargetLibraryInfo.h:443
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:421
llvm::MemoryLocation
Representation for a specific memory location.
Definition: MemoryLocation.h:210
llvm::MemorySSAUpdater::removeMemoryAccess
void removeMemoryAccess(MemoryAccess *, bool OptimizePhis=false)
Remove a MemoryAccess from MemorySSA, including updating all definitions and uses.
Definition: MemorySSAUpdater.cpp:1305
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
llvm::MemTransferBase::getSource
Value * getSource() const
This is just like getRawSource, but it strips off any cast instructions that feed it,...
Definition: IntrinsicInst.h:805
llvm::isDereferenceableAndAlignedPointer
bool isDereferenceableAndAlignedPointer(const Value *V, Type *Ty, Align Alignment, const DataLayout &DL, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Returns true if V is always a dereferenceable pointer with alignment greater or equal than requested.
Definition: Loads.cpp:205
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37
llvm::SmallVectorImpl::insert
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:792