LLVM  13.0.0git
MemCpyOptimizer.cpp
Go to the documentation of this file.
1 //===- MemCpyOptimizer.cpp - Optimize use of memcpy and friends -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass performs various transformations related to eliminating memcpy
10 // calls, or transforming sets of stores into memset's.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 #include "llvm/ADT/DenseSet.h"
16 #include "llvm/ADT/None.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/Statistic.h"
24 #include "llvm/Analysis/Loads.h"
31 #include "llvm/IR/Argument.h"
32 #include "llvm/IR/BasicBlock.h"
33 #include "llvm/IR/Constants.h"
34 #include "llvm/IR/DataLayout.h"
35 #include "llvm/IR/DerivedTypes.h"
36 #include "llvm/IR/Dominators.h"
37 #include "llvm/IR/Function.h"
39 #include "llvm/IR/GlobalVariable.h"
40 #include "llvm/IR/IRBuilder.h"
41 #include "llvm/IR/InstrTypes.h"
42 #include "llvm/IR/Instruction.h"
43 #include "llvm/IR/Instructions.h"
44 #include "llvm/IR/IntrinsicInst.h"
45 #include "llvm/IR/Intrinsics.h"
46 #include "llvm/IR/LLVMContext.h"
47 #include "llvm/IR/Module.h"
48 #include "llvm/IR/Operator.h"
49 #include "llvm/IR/PassManager.h"
50 #include "llvm/IR/Type.h"
51 #include "llvm/IR/User.h"
52 #include "llvm/IR/Value.h"
53 #include "llvm/InitializePasses.h"
54 #include "llvm/Pass.h"
55 #include "llvm/Support/Casting.h"
56 #include "llvm/Support/Debug.h"
59 #include "llvm/Transforms/Scalar.h"
61 #include <algorithm>
62 #include <cassert>
63 #include <cstdint>
64 #include <utility>
65 
66 using namespace llvm;
67 
68 #define DEBUG_TYPE "memcpyopt"
69 
70 static cl::opt<bool>
71  EnableMemorySSA("enable-memcpyopt-memoryssa", cl::init(true), cl::Hidden,
72  cl::desc("Use MemorySSA-backed MemCpyOpt."));
73 
74 STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted");
75 STATISTIC(NumMemSetInfer, "Number of memsets inferred");
76 STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy");
77 STATISTIC(NumCpyToSet, "Number of memcpys converted to memset");
78 STATISTIC(NumCallSlot, "Number of call slot optimizations performed");
79 
80 namespace {
81 
82 /// Represents a range of memset'd bytes with the ByteVal value.
83 /// This allows us to analyze stores like:
84 /// store 0 -> P+1
85 /// store 0 -> P+0
86 /// store 0 -> P+3
87 /// store 0 -> P+2
88 /// which sometimes happens with stores to arrays of structs etc. When we see
89 /// the first store, we make a range [1, 2). The second store extends the range
90 /// to [0, 2). The third makes a new range [2, 3). The fourth store joins the
91 /// two ranges into [0, 3) which is memset'able.
92 struct MemsetRange {
93  // Start/End - A semi range that describes the span that this range covers.
94  // The range is closed at the start and open at the end: [Start, End).
95  int64_t Start, End;
96 
97  /// StartPtr - The getelementptr instruction that points to the start of the
98  /// range.
99  Value *StartPtr;
100 
101  /// Alignment - The known alignment of the first store.
102  unsigned Alignment;
103 
104  /// TheStores - The actual stores that make up this range.
106 
107  bool isProfitableToUseMemset(const DataLayout &DL) const;
108 };
109 
110 } // end anonymous namespace
111 
112 bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const {
113  // If we found more than 4 stores to merge or 16 bytes, use memset.
114  if (TheStores.size() >= 4 || End-Start >= 16) return true;
115 
116  // If there is nothing to merge, don't do anything.
117  if (TheStores.size() < 2) return false;
118 
119  // If any of the stores are a memset, then it is always good to extend the
120  // memset.
121  for (Instruction *SI : TheStores)
122  if (!isa<StoreInst>(SI))
123  return true;
124 
125  // Assume that the code generator is capable of merging pairs of stores
126  // together if it wants to.
127  if (TheStores.size() == 2) return false;
128 
129  // If we have fewer than 8 stores, it can still be worthwhile to do this.
130  // For example, merging 4 i8 stores into an i32 store is useful almost always.
131  // However, merging 2 32-bit stores isn't useful on a 32-bit architecture (the
132  // memset will be split into 2 32-bit stores anyway) and doing so can
133  // pessimize the llvm optimizer.
134  //
135  // Since we don't have perfect knowledge here, make some assumptions: assume
136  // the maximum GPR width is the same size as the largest legal integer
137  // size. If so, check to see whether we will end up actually reducing the
138  // number of stores used.
139  unsigned Bytes = unsigned(End-Start);
140  unsigned MaxIntSize = DL.getLargestLegalIntTypeSizeInBits() / 8;
141  if (MaxIntSize == 0)
142  MaxIntSize = 1;
143  unsigned NumPointerStores = Bytes / MaxIntSize;
144 
145  // Assume the remaining bytes if any are done a byte at a time.
146  unsigned NumByteStores = Bytes % MaxIntSize;
147 
148  // If we will reduce the # stores (according to this heuristic), do the
149  // transformation. This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32
150  // etc.
151  return TheStores.size() > NumPointerStores+NumByteStores;
152 }
153 
154 namespace {
155 
156 class MemsetRanges {
157  using range_iterator = SmallVectorImpl<MemsetRange>::iterator;
158 
159  /// A sorted list of the memset ranges.
161 
162  const DataLayout &DL;
163 
164 public:
165  MemsetRanges(const DataLayout &DL) : DL(DL) {}
166 
167  using const_iterator = SmallVectorImpl<MemsetRange>::const_iterator;
168 
169  const_iterator begin() const { return Ranges.begin(); }
170  const_iterator end() const { return Ranges.end(); }
171  bool empty() const { return Ranges.empty(); }
172 
173  void addInst(int64_t OffsetFromFirst, Instruction *Inst) {
174  if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
175  addStore(OffsetFromFirst, SI);
176  else
177  addMemSet(OffsetFromFirst, cast<MemSetInst>(Inst));
178  }
179 
180  void addStore(int64_t OffsetFromFirst, StoreInst *SI) {
181  int64_t StoreSize = DL.getTypeStoreSize(SI->getOperand(0)->getType());
182 
183  addRange(OffsetFromFirst, StoreSize, SI->getPointerOperand(),
184  SI->getAlign().value(), SI);
185  }
186 
187  void addMemSet(int64_t OffsetFromFirst, MemSetInst *MSI) {
188  int64_t Size = cast<ConstantInt>(MSI->getLength())->getZExtValue();
189  addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getDestAlignment(), MSI);
190  }
191 
192  void addRange(int64_t Start, int64_t Size, Value *Ptr,
193  unsigned Alignment, Instruction *Inst);
194 };
195 
196 } // end anonymous namespace
197 
198 /// Add a new store to the MemsetRanges data structure. This adds a
199 /// new range for the specified store at the specified offset, merging into
200 /// existing ranges as appropriate.
201 void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,
202  unsigned Alignment, Instruction *Inst) {
203  int64_t End = Start+Size;
204 
205  range_iterator I = partition_point(
206  Ranges, [=](const MemsetRange &O) { return O.End < Start; });
207 
208  // We now know that I == E, in which case we didn't find anything to merge
209  // with, or that Start <= I->End. If End < I->Start or I == E, then we need
210  // to insert a new range. Handle this now.
211  if (I == Ranges.end() || End < I->Start) {
212  MemsetRange &R = *Ranges.insert(I, MemsetRange());
213  R.Start = Start;
214  R.End = End;
215  R.StartPtr = Ptr;
216  R.Alignment = Alignment;
217  R.TheStores.push_back(Inst);
218  return;
219  }
220 
221  // This store overlaps with I, add it.
222  I->TheStores.push_back(Inst);
223 
224  // At this point, we may have an interval that completely contains our store.
225  // If so, just add it to the interval and return.
226  if (I->Start <= Start && I->End >= End)
227  return;
228 
229  // Now we know that Start <= I->End and End >= I->Start so the range overlaps
230  // but is not entirely contained within the range.
231 
232  // See if the range extends the start of the range. In this case, it couldn't
233  // possibly cause it to join the prior range, because otherwise we would have
234  // stopped on *it*.
235  if (Start < I->Start) {
236  I->Start = Start;
237  I->StartPtr = Ptr;
238  I->Alignment = Alignment;
239  }
240 
241  // Now we know that Start <= I->End and Start >= I->Start (so the startpoint
242  // is in or right at the end of I), and that End >= I->Start. Extend I out to
243  // End.
244  if (End > I->End) {
245  I->End = End;
246  range_iterator NextI = I;
247  while (++NextI != Ranges.end() && End >= NextI->Start) {
248  // Merge the range in.
249  I->TheStores.append(NextI->TheStores.begin(), NextI->TheStores.end());
250  if (NextI->End > I->End)
251  I->End = NextI->End;
252  Ranges.erase(NextI);
253  NextI = I;
254  }
255  }
256 }
257 
258 //===----------------------------------------------------------------------===//
259 // MemCpyOptLegacyPass Pass
260 //===----------------------------------------------------------------------===//
261 
262 namespace {
263 
264 class MemCpyOptLegacyPass : public FunctionPass {
265  MemCpyOptPass Impl;
266 
267 public:
268  static char ID; // Pass identification, replacement for typeid
269 
270  MemCpyOptLegacyPass() : FunctionPass(ID) {
272  }
273 
274  bool runOnFunction(Function &F) override;
275 
276 private:
277  // This transformation requires dominator postdominator info
278  void getAnalysisUsage(AnalysisUsage &AU) const override {
279  AU.setPreservesCFG();
285  if (!EnableMemorySSA)
290  if (EnableMemorySSA)
293  }
294 };
295 
296 } // end anonymous namespace
297 
298 char MemCpyOptLegacyPass::ID = 0;
299 
300 /// The public interface to this file...
301 FunctionPass *llvm::createMemCpyOptPass() { return new MemCpyOptLegacyPass(); }
302 
303 INITIALIZE_PASS_BEGIN(MemCpyOptLegacyPass, "memcpyopt", "MemCpy Optimization",
304  false, false)
312 INITIALIZE_PASS_END(MemCpyOptLegacyPass, "memcpyopt", "MemCpy Optimization",
314 
315 // Check that V is either not accessible by the caller, or unwinding cannot
316 // occur between Start and End.
318  Instruction *End) {
319  assert(Start->getParent() == End->getParent() && "Must be in same block");
320  if (!Start->getFunction()->doesNotThrow() &&
321  !isa<AllocaInst>(getUnderlyingObject(V))) {
322  for (const Instruction &I :
323  make_range(Start->getIterator(), End->getIterator())) {
324  if (I.mayThrow())
325  return true;
326  }
327  }
328  return false;
329 }
330 
331 void MemCpyOptPass::eraseInstruction(Instruction *I) {
332  if (MSSAU)
333  MSSAU->removeMemoryAccess(I);
334  if (MD)
335  MD->removeInstruction(I);
336  I->eraseFromParent();
337 }
338 
339 // Check for mod or ref of Loc between Start and End, excluding both boundaries.
340 // Start and End must be in the same block
342  const MemoryUseOrDef *Start,
343  const MemoryUseOrDef *End) {
344  assert(Start->getBlock() == End->getBlock() && "Only local supported");
345  for (const MemoryAccess &MA :
346  make_range(++Start->getIterator(), End->getIterator())) {
347  if (isModOrRefSet(AA.getModRefInfo(cast<MemoryUseOrDef>(MA).getMemoryInst(),
348  Loc)))
349  return true;
350  }
351  return false;
352 }
353 
354 // Check for mod of Loc between Start and End, excluding both boundaries.
355 // Start and End can be in different blocks.
356 static bool writtenBetween(MemorySSA *MSSA, MemoryLocation Loc,
357  const MemoryUseOrDef *Start,
358  const MemoryUseOrDef *End) {
359  // TODO: Only walk until we hit Start.
361  End->getDefiningAccess(), Loc);
362  return !MSSA->dominates(Clobber, Start);
363 }
364 
365 /// When scanning forward over instructions, we look for some other patterns to
366 /// fold away. In particular, this looks for stores to neighboring locations of
367 /// memory. If it sees enough consecutive ones, it attempts to merge them
368 /// together into a memcpy/memset.
369 Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
370  Value *StartPtr,
371  Value *ByteVal) {
372  const DataLayout &DL = StartInst->getModule()->getDataLayout();
373 
374  // Okay, so we now have a single store that can be splatable. Scan to find
375  // all subsequent stores of the same value to offset from the same pointer.
376  // Join these together into ranges, so we can decide whether contiguous blocks
377  // are stored.
378  MemsetRanges Ranges(DL);
379 
380  BasicBlock::iterator BI(StartInst);
381 
382  // Keeps track of the last memory use or def before the insertion point for
383  // the new memset. The new MemoryDef for the inserted memsets will be inserted
384  // after MemInsertPoint. It points to either LastMemDef or to the last user
385  // before the insertion point of the memset, if there are any such users.
386  MemoryUseOrDef *MemInsertPoint = nullptr;
387  // Keeps track of the last MemoryDef between StartInst and the insertion point
388  // for the new memset. This will become the defining access of the inserted
389  // memsets.
390  MemoryDef *LastMemDef = nullptr;
391  for (++BI; !BI->isTerminator(); ++BI) {
392  if (MSSAU) {
393  auto *CurrentAcc = cast_or_null<MemoryUseOrDef>(
394  MSSAU->getMemorySSA()->getMemoryAccess(&*BI));
395  if (CurrentAcc) {
396  MemInsertPoint = CurrentAcc;
397  if (auto *CurrentDef = dyn_cast<MemoryDef>(CurrentAcc))
398  LastMemDef = CurrentDef;
399  }
400  }
401 
402  // Calls that only access inaccessible memory do not block merging
403  // accessible stores.
404  if (auto *CB = dyn_cast<CallBase>(BI)) {
405  if (CB->onlyAccessesInaccessibleMemory())
406  continue;
407  }
408 
409  if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) {
410  // If the instruction is readnone, ignore it, otherwise bail out. We
411  // don't even allow readonly here because we don't want something like:
412  // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A).
413  if (BI->mayWriteToMemory() || BI->mayReadFromMemory())
414  break;
415  continue;
416  }
417 
418  if (StoreInst *NextStore = dyn_cast<StoreInst>(BI)) {
419  // If this is a store, see if we can merge it in.
420  if (!NextStore->isSimple()) break;
421 
422  Value *StoredVal = NextStore->getValueOperand();
423 
424  // Don't convert stores of non-integral pointer types to memsets (which
425  // stores integers).
426  if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()))
427  break;
428 
429  // Check to see if this stored value is of the same byte-splattable value.
430  Value *StoredByte = isBytewiseValue(StoredVal, DL);
431  if (isa<UndefValue>(ByteVal) && StoredByte)
432  ByteVal = StoredByte;
433  if (ByteVal != StoredByte)
434  break;
435 
436  // Check to see if this store is to a constant offset from the start ptr.
438  isPointerOffset(StartPtr, NextStore->getPointerOperand(), DL);
439  if (!Offset)
440  break;
441 
442  Ranges.addStore(*Offset, NextStore);
443  } else {
444  MemSetInst *MSI = cast<MemSetInst>(BI);
445 
446  if (MSI->isVolatile() || ByteVal != MSI->getValue() ||
447  !isa<ConstantInt>(MSI->getLength()))
448  break;
449 
450  // Check to see if this store is to a constant offset from the start ptr.
451  Optional<int64_t> Offset = isPointerOffset(StartPtr, MSI->getDest(), DL);
452  if (!Offset)
453  break;
454 
455  Ranges.addMemSet(*Offset, MSI);
456  }
457  }
458 
459  // If we have no ranges, then we just had a single store with nothing that
460  // could be merged in. This is a very common case of course.
461  if (Ranges.empty())
462  return nullptr;
463 
464  // If we had at least one store that could be merged in, add the starting
465  // store as well. We try to avoid this unless there is at least something
466  // interesting as a small compile-time optimization.
467  Ranges.addInst(0, StartInst);
468 
469  // If we create any memsets, we put it right before the first instruction that
470  // isn't part of the memset block. This ensure that the memset is dominated
471  // by any addressing instruction needed by the start of the block.
472  IRBuilder<> Builder(&*BI);
473 
474  // Now that we have full information about ranges, loop over the ranges and
475  // emit memset's for anything big enough to be worthwhile.
476  Instruction *AMemSet = nullptr;
477  for (const MemsetRange &Range : Ranges) {
478  if (Range.TheStores.size() == 1) continue;
479 
480  // If it is profitable to lower this range to memset, do so now.
481  if (!Range.isProfitableToUseMemset(DL))
482  continue;
483 
484  // Otherwise, we do want to transform this! Create a new memset.
485  // Get the starting pointer of the block.
486  StartPtr = Range.StartPtr;
487 
488  AMemSet = Builder.CreateMemSet(StartPtr, ByteVal, Range.End - Range.Start,
489  MaybeAlign(Range.Alignment));
490  LLVM_DEBUG(dbgs() << "Replace stores:\n"; for (Instruction *SI
491  : Range.TheStores) dbgs()
492  << *SI << '\n';
493  dbgs() << "With: " << *AMemSet << '\n');
494  if (!Range.TheStores.empty())
495  AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc());
496 
497  if (MSSAU) {
498  assert(LastMemDef && MemInsertPoint &&
499  "Both LastMemDef and MemInsertPoint need to be set");
500  auto *NewDef =
501  cast<MemoryDef>(MemInsertPoint->getMemoryInst() == &*BI
502  ? MSSAU->createMemoryAccessBefore(
503  AMemSet, LastMemDef, MemInsertPoint)
504  : MSSAU->createMemoryAccessAfter(
505  AMemSet, LastMemDef, MemInsertPoint));
506  MSSAU->insertDef(NewDef, /*RenameUses=*/true);
507  LastMemDef = NewDef;
508  MemInsertPoint = NewDef;
509  }
510 
511  // Zap all the stores.
512  for (Instruction *SI : Range.TheStores)
513  eraseInstruction(SI);
514 
515  ++NumMemSetInfer;
516  }
517 
518  return AMemSet;
519 }
520 
521 // This method try to lift a store instruction before position P.
522 // It will lift the store and its argument + that anything that
523 // may alias with these.
524 // The method returns true if it was successful.
525 bool MemCpyOptPass::moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI) {
526  // If the store alias this position, early bail out.
528  if (isModOrRefSet(AA->getModRefInfo(P, StoreLoc)))
529  return false;
530 
531  // Keep track of the arguments of all instruction we plan to lift
532  // so we can make sure to lift them as well if appropriate.
534  if (auto *Ptr = dyn_cast<Instruction>(SI->getPointerOperand()))
535  if (Ptr->getParent() == SI->getParent())
536  Args.insert(Ptr);
537 
538  // Instruction to lift before P.
540 
541  // Memory locations of lifted instructions.
542  SmallVector<MemoryLocation, 8> MemLocs{StoreLoc};
543 
544  // Lifted calls.
546 
547  const MemoryLocation LoadLoc = MemoryLocation::get(LI);
548 
549  for (auto I = --SI->getIterator(), E = P->getIterator(); I != E; --I) {
550  auto *C = &*I;
551 
552  // Make sure hoisting does not perform a store that was not guaranteed to
553  // happen.
555  return false;
556 
557  bool MayAlias = isModOrRefSet(AA->getModRefInfo(C, None));
558 
559  bool NeedLift = false;
560  if (Args.erase(C))
561  NeedLift = true;
562  else if (MayAlias) {
563  NeedLift = llvm::any_of(MemLocs, [C, this](const MemoryLocation &ML) {
564  return isModOrRefSet(AA->getModRefInfo(C, ML));
565  });
566 
567  if (!NeedLift)
568  NeedLift = llvm::any_of(Calls, [C, this](const CallBase *Call) {
569  return isModOrRefSet(AA->getModRefInfo(C, Call));
570  });
571  }
572 
573  if (!NeedLift)
574  continue;
575 
576  if (MayAlias) {
577  // Since LI is implicitly moved downwards past the lifted instructions,
578  // none of them may modify its source.
579  if (isModSet(AA->getModRefInfo(C, LoadLoc)))
580  return false;
581  else if (const auto *Call = dyn_cast<CallBase>(C)) {
582  // If we can't lift this before P, it's game over.
583  if (isModOrRefSet(AA->getModRefInfo(P, Call)))
584  return false;
585 
586  Calls.push_back(Call);
587  } else if (isa<LoadInst>(C) || isa<StoreInst>(C) || isa<VAArgInst>(C)) {
588  // If we can't lift this before P, it's game over.
589  auto ML = MemoryLocation::get(C);
590  if (isModOrRefSet(AA->getModRefInfo(P, ML)))
591  return false;
592 
593  MemLocs.push_back(ML);
594  } else
595  // We don't know how to lift this instruction.
596  return false;
597  }
598 
599  ToLift.push_back(C);
600  for (unsigned k = 0, e = C->getNumOperands(); k != e; ++k)
601  if (auto *A = dyn_cast<Instruction>(C->getOperand(k))) {
602  if (A->getParent() == SI->getParent()) {
603  // Cannot hoist user of P above P
604  if(A == P) return false;
605  Args.insert(A);
606  }
607  }
608  }
609 
610  // Find MSSA insertion point. Normally P will always have a corresponding
611  // memory access before which we can insert. However, with non-standard AA
612  // pipelines, there may be a mismatch between AA and MSSA, in which case we
613  // will scan for a memory access before P. In either case, we know for sure
614  // that at least the load will have a memory access.
615  // TODO: Simplify this once P will be determined by MSSA, in which case the
616  // discrepancy can no longer occur.
617  MemoryUseOrDef *MemInsertPoint = nullptr;
618  if (MSSAU) {
619  if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(P)) {
620  MemInsertPoint = cast<MemoryUseOrDef>(--MA->getIterator());
621  } else {
622  const Instruction *ConstP = P;
623  for (const Instruction &I : make_range(++ConstP->getReverseIterator(),
624  ++LI->getReverseIterator())) {
625  if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(&I)) {
626  MemInsertPoint = MA;
627  break;
628  }
629  }
630  }
631  }
632 
633  // We made it, we need to lift.
634  for (auto *I : llvm::reverse(ToLift)) {
635  LLVM_DEBUG(dbgs() << "Lifting " << *I << " before " << *P << "\n");
636  I->moveBefore(P);
637  if (MSSAU) {
638  assert(MemInsertPoint && "Must have found insert point");
639  if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(I)) {
640  MSSAU->moveAfter(MA, MemInsertPoint);
641  MemInsertPoint = MA;
642  }
643  }
644  }
645 
646  return true;
647 }
648 
649 bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
650  if (!SI->isSimple()) return false;
651 
652  // Avoid merging nontemporal stores since the resulting
653  // memcpy/memset would not be able to preserve the nontemporal hint.
654  // In theory we could teach how to propagate the !nontemporal metadata to
655  // memset calls. However, that change would force the backend to
656  // conservatively expand !nontemporal memset calls back to sequences of
657  // store instructions (effectively undoing the merging).
658  if (SI->getMetadata(LLVMContext::MD_nontemporal))
659  return false;
660 
661  const DataLayout &DL = SI->getModule()->getDataLayout();
662 
663  Value *StoredVal = SI->getValueOperand();
664 
665  // Not all the transforms below are correct for non-integral pointers, bail
666  // until we've audited the individual pieces.
667  if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()))
668  return false;
669 
670  // Load to store forwarding can be interpreted as memcpy.
671  if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) {
672  if (LI->isSimple() && LI->hasOneUse() &&
673  LI->getParent() == SI->getParent()) {
674 
675  auto *T = LI->getType();
676  if (T->isAggregateType()) {
677  MemoryLocation LoadLoc = MemoryLocation::get(LI);
678 
679  // We use alias analysis to check if an instruction may store to
680  // the memory we load from in between the load and the store. If
681  // such an instruction is found, we try to promote there instead
682  // of at the store position.
683  // TODO: Can use MSSA for this.
684  Instruction *P = SI;
685  for (auto &I : make_range(++LI->getIterator(), SI->getIterator())) {
686  if (isModSet(AA->getModRefInfo(&I, LoadLoc))) {
687  P = &I;
688  break;
689  }
690  }
691 
692  // We found an instruction that may write to the loaded memory.
693  // We can try to promote at this position instead of the store
694  // position if nothing alias the store memory after this and the store
695  // destination is not in the range.
696  if (P && P != SI) {
697  if (!moveUp(SI, P, LI))
698  P = nullptr;
699  }
700 
701  // If a valid insertion position is found, then we can promote
702  // the load/store pair to a memcpy.
703  if (P) {
704  // If we load from memory that may alias the memory we store to,
705  // memmove must be used to preserve semantic. If not, memcpy can
706  // be used.
707  bool UseMemMove = false;
708  if (!AA->isNoAlias(MemoryLocation::get(SI), LoadLoc))
709  UseMemMove = true;
710 
711  uint64_t Size = DL.getTypeStoreSize(T);
712 
714  Instruction *M;
715  if (UseMemMove)
716  M = Builder.CreateMemMove(
717  SI->getPointerOperand(), SI->getAlign(),
718  LI->getPointerOperand(), LI->getAlign(), Size);
719  else
720  M = Builder.CreateMemCpy(
721  SI->getPointerOperand(), SI->getAlign(),
722  LI->getPointerOperand(), LI->getAlign(), Size);
723 
724  LLVM_DEBUG(dbgs() << "Promoting " << *LI << " to " << *SI << " => "
725  << *M << "\n");
726 
727  if (MSSAU) {
728  auto *LastDef =
729  cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(SI));
730  auto *NewAccess =
731  MSSAU->createMemoryAccessAfter(M, LastDef, LastDef);
732  MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
733  }
734 
735  eraseInstruction(SI);
736  eraseInstruction(LI);
737  ++NumMemCpyInstr;
738 
739  // Make sure we do not invalidate the iterator.
740  BBI = M->getIterator();
741  return true;
742  }
743  }
744 
745  // Detect cases where we're performing call slot forwarding, but
746  // happen to be using a load-store pair to implement it, rather than
747  // a memcpy.
748  CallInst *C = nullptr;
749  if (EnableMemorySSA) {
750  if (auto *LoadClobber = dyn_cast<MemoryUseOrDef>(
751  MSSA->getWalker()->getClobberingMemoryAccess(LI))) {
752  // The load most post-dom the call. Limit to the same block for now.
753  // TODO: Support non-local call-slot optimization?
754  if (LoadClobber->getBlock() == SI->getParent())
755  C = dyn_cast_or_null<CallInst>(LoadClobber->getMemoryInst());
756  }
757  } else {
758  MemDepResult ldep = MD->getDependency(LI);
759  if (ldep.isClobber() && !isa<MemCpyInst>(ldep.getInst()))
760  C = dyn_cast<CallInst>(ldep.getInst());
761  }
762 
763  if (C) {
764  // Check that nothing touches the dest of the "copy" between
765  // the call and the store.
767  if (EnableMemorySSA) {
768  if (accessedBetween(*AA, StoreLoc, MSSA->getMemoryAccess(C),
769  MSSA->getMemoryAccess(SI)))
770  C = nullptr;
771  } else {
772  for (BasicBlock::iterator I = --SI->getIterator(),
773  E = C->getIterator();
774  I != E; --I) {
775  if (isModOrRefSet(AA->getModRefInfo(&*I, StoreLoc))) {
776  C = nullptr;
777  break;
778  }
779  }
780  }
781  }
782 
783  if (C) {
784  bool changed = performCallSlotOptzn(
785  LI, SI, SI->getPointerOperand()->stripPointerCasts(),
787  DL.getTypeStoreSize(SI->getOperand(0)->getType()),
788  commonAlignment(SI->getAlign(), LI->getAlign()), C);
789  if (changed) {
790  eraseInstruction(SI);
791  eraseInstruction(LI);
792  ++NumMemCpyInstr;
793  return true;
794  }
795  }
796  }
797  }
798 
799  // There are two cases that are interesting for this code to handle: memcpy
800  // and memset. Right now we only handle memset.
801 
802  // Ensure that the value being stored is something that can be memset'able a
803  // byte at a time like "0" or "-1" or any width, as well as things like
804  // 0xA0A0A0A0 and 0.0.
805  auto *V = SI->getOperand(0);
806  if (Value *ByteVal = isBytewiseValue(V, DL)) {
807  if (Instruction *I = tryMergingIntoMemset(SI, SI->getPointerOperand(),
808  ByteVal)) {
809  BBI = I->getIterator(); // Don't invalidate iterator.
810  return true;
811  }
812 
813  // If we have an aggregate, we try to promote it to memset regardless
814  // of opportunity for merging as it can expose optimization opportunities
815  // in subsequent passes.
816  auto *T = V->getType();
817  if (T->isAggregateType()) {
818  uint64_t Size = DL.getTypeStoreSize(T);
820  auto *M = Builder.CreateMemSet(SI->getPointerOperand(), ByteVal, Size,
821  SI->getAlign());
822 
823  LLVM_DEBUG(dbgs() << "Promoting " << *SI << " to " << *M << "\n");
824 
825  if (MSSAU) {
826  assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(SI)));
827  auto *LastDef =
828  cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(SI));
829  auto *NewAccess = MSSAU->createMemoryAccessAfter(M, LastDef, LastDef);
830  MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
831  }
832 
833  eraseInstruction(SI);
834  NumMemSetInfer++;
835 
836  // Make sure we do not invalidate the iterator.
837  BBI = M->getIterator();
838  return true;
839  }
840  }
841 
842  return false;
843 }
844 
845 bool MemCpyOptPass::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) {
846  // See if there is another memset or store neighboring this memset which
847  // allows us to widen out the memset to do a single larger store.
848  if (isa<ConstantInt>(MSI->getLength()) && !MSI->isVolatile())
849  if (Instruction *I = tryMergingIntoMemset(MSI, MSI->getDest(),
850  MSI->getValue())) {
851  BBI = I->getIterator(); // Don't invalidate iterator.
852  return true;
853  }
854  return false;
855 }
856 
857 /// Takes a memcpy and a call that it depends on,
858 /// and checks for the possibility of a call slot optimization by having
859 /// the call write its result directly into the destination of the memcpy.
860 bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
861  Instruction *cpyStore, Value *cpyDest,
862  Value *cpySrc, uint64_t cpyLen,
863  Align cpyAlign, CallInst *C) {
864  // The general transformation to keep in mind is
865  //
866  // call @func(..., src, ...)
867  // memcpy(dest, src, ...)
868  //
869  // ->
870  //
871  // memcpy(dest, src, ...)
872  // call @func(..., dest, ...)
873  //
874  // Since moving the memcpy is technically awkward, we additionally check that
875  // src only holds uninitialized values at the moment of the call, meaning that
876  // the memcpy can be discarded rather than moved.
877 
878  // Lifetime marks shouldn't be operated on.
879  if (Function *F = C->getCalledFunction())
880  if (F->isIntrinsic() && F->getIntrinsicID() == Intrinsic::lifetime_start)
881  return false;
882 
883  // Require that src be an alloca. This simplifies the reasoning considerably.
884  AllocaInst *srcAlloca = dyn_cast<AllocaInst>(cpySrc);
885  if (!srcAlloca)
886  return false;
887 
888  ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize());
889  if (!srcArraySize)
890  return false;
891 
892  const DataLayout &DL = cpyLoad->getModule()->getDataLayout();
893  uint64_t srcSize = DL.getTypeAllocSize(srcAlloca->getAllocatedType()) *
894  srcArraySize->getZExtValue();
895 
896  if (cpyLen < srcSize)
897  return false;
898 
899  // Check that accessing the first srcSize bytes of dest will not cause a
900  // trap. Otherwise the transform is invalid since it might cause a trap
901  // to occur earlier than it otherwise would.
902  if (!isDereferenceableAndAlignedPointer(cpyDest, Align(1), APInt(64, cpyLen),
903  DL, C, DT))
904  return false;
905 
906  // Make sure that nothing can observe cpyDest being written early. There are
907  // a number of cases to consider:
908  // 1. cpyDest cannot be accessed between C and cpyStore as a precondition of
909  // the transform.
910  // 2. C itself may not access cpyDest (prior to the transform). This is
911  // checked further below.
912  // 3. If cpyDest is accessible to the caller of this function (potentially
913  // captured and not based on an alloca), we need to ensure that we cannot
914  // unwind between C and cpyStore. This is checked here.
915  // 4. If cpyDest is potentially captured, there may be accesses to it from
916  // another thread. In this case, we need to check that cpyStore is
917  // guaranteed to be executed if C is. As it is a non-atomic access, it
918  // renders accesses from other threads undefined.
919  // TODO: This is currently not checked.
920  if (mayBeVisibleThroughUnwinding(cpyDest, C, cpyStore))
921  return false;
922 
923  // Check that dest points to memory that is at least as aligned as src.
924  Align srcAlign = srcAlloca->getAlign();
925  bool isDestSufficientlyAligned = srcAlign <= cpyAlign;
926  // If dest is not aligned enough and we can't increase its alignment then
927  // bail out.
928  if (!isDestSufficientlyAligned && !isa<AllocaInst>(cpyDest))
929  return false;
930 
931  // Check that src is not accessed except via the call and the memcpy. This
932  // guarantees that it holds only undefined values when passed in (so the final
933  // memcpy can be dropped), that it is not read or written between the call and
934  // the memcpy, and that writing beyond the end of it is undefined.
935  SmallVector<User *, 8> srcUseList(srcAlloca->users());
936  while (!srcUseList.empty()) {
937  User *U = srcUseList.pop_back_val();
938 
939  if (isa<BitCastInst>(U) || isa<AddrSpaceCastInst>(U)) {
940  append_range(srcUseList, U->users());
941  continue;
942  }
943  if (GetElementPtrInst *G = dyn_cast<GetElementPtrInst>(U)) {
944  if (!G->hasAllZeroIndices())
945  return false;
946 
947  append_range(srcUseList, U->users());
948  continue;
949  }
950  if (const IntrinsicInst *IT = dyn_cast<IntrinsicInst>(U))
951  if (IT->isLifetimeStartOrEnd())
952  continue;
953 
954  if (U != C && U != cpyLoad)
955  return false;
956  }
957 
958  // Check that src isn't captured by the called function since the
959  // transformation can cause aliasing issues in that case.
960  for (unsigned ArgI = 0, E = C->arg_size(); ArgI != E; ++ArgI)
961  if (C->getArgOperand(ArgI) == cpySrc && !C->doesNotCapture(ArgI))
962  return false;
963 
964  // Since we're changing the parameter to the callsite, we need to make sure
965  // that what would be the new parameter dominates the callsite.
966  if (!DT->dominates(cpyDest, C)) {
967  // Support moving a constant index GEP before the call.
968  auto *GEP = dyn_cast<GetElementPtrInst>(cpyDest);
969  if (GEP && GEP->hasAllConstantIndices() &&
970  DT->dominates(GEP->getPointerOperand(), C))
971  GEP->moveBefore(C);
972  else
973  return false;
974  }
975 
976  // In addition to knowing that the call does not access src in some
977  // unexpected manner, for example via a global, which we deduce from
978  // the use analysis, we also need to know that it does not sneakily
979  // access dest. We rely on AA to figure this out for us.
980  ModRefInfo MR = AA->getModRefInfo(C, cpyDest, LocationSize::precise(srcSize));
981  // If necessary, perform additional analysis.
982  if (isModOrRefSet(MR))
983  MR = AA->callCapturesBefore(C, cpyDest, LocationSize::precise(srcSize), DT);
984  if (isModOrRefSet(MR))
985  return false;
986 
987  // We can't create address space casts here because we don't know if they're
988  // safe for the target.
989  if (cpySrc->getType()->getPointerAddressSpace() !=
990  cpyDest->getType()->getPointerAddressSpace())
991  return false;
992  for (unsigned ArgI = 0; ArgI < C->arg_size(); ++ArgI)
993  if (C->getArgOperand(ArgI)->stripPointerCasts() == cpySrc &&
994  cpySrc->getType()->getPointerAddressSpace() !=
995  C->getArgOperand(ArgI)->getType()->getPointerAddressSpace())
996  return false;
997 
998  // All the checks have passed, so do the transformation.
999  bool changedArgument = false;
1000  for (unsigned ArgI = 0; ArgI < C->arg_size(); ++ArgI)
1001  if (C->getArgOperand(ArgI)->stripPointerCasts() == cpySrc) {
1002  Value *Dest = cpySrc->getType() == cpyDest->getType() ? cpyDest
1003  : CastInst::CreatePointerCast(cpyDest, cpySrc->getType(),
1004  cpyDest->getName(), C);
1005  changedArgument = true;
1006  if (C->getArgOperand(ArgI)->getType() == Dest->getType())
1007  C->setArgOperand(ArgI, Dest);
1008  else
1009  C->setArgOperand(ArgI, CastInst::CreatePointerCast(
1010  Dest, C->getArgOperand(ArgI)->getType(),
1011  Dest->getName(), C));
1012  }
1013 
1014  if (!changedArgument)
1015  return false;
1016 
1017  // If the destination wasn't sufficiently aligned then increase its alignment.
1018  if (!isDestSufficientlyAligned) {
1019  assert(isa<AllocaInst>(cpyDest) && "Can only increase alloca alignment!");
1020  cast<AllocaInst>(cpyDest)->setAlignment(srcAlign);
1021  }
1022 
1023  // Drop any cached information about the call, because we may have changed
1024  // its dependence information by changing its parameter.
1025  if (MD)
1026  MD->removeInstruction(C);
1027 
1028  // Update AA metadata
1029  // FIXME: MD_tbaa_struct and MD_mem_parallel_loop_access should also be
1030  // handled here, but combineMetadata doesn't support them yet
1031  unsigned KnownIDs[] = {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
1032  LLVMContext::MD_noalias,
1033  LLVMContext::MD_invariant_group,
1034  LLVMContext::MD_access_group};
1035  combineMetadata(C, cpyLoad, KnownIDs, true);
1036 
1037  ++NumCallSlot;
1038  return true;
1039 }
1040 
1041 /// We've found that the (upward scanning) memory dependence of memcpy 'M' is
1042 /// the memcpy 'MDep'. Try to simplify M to copy from MDep's input if we can.
1043 bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
1044  MemCpyInst *MDep) {
1045  // We can only transforms memcpy's where the dest of one is the source of the
1046  // other.
1047  if (M->getSource() != MDep->getDest() || MDep->isVolatile())
1048  return false;
1049 
1050  // If dep instruction is reading from our current input, then it is a noop
1051  // transfer and substituting the input won't change this instruction. Just
1052  // ignore the input and let someone else zap MDep. This handles cases like:
1053  // memcpy(a <- a)
1054  // memcpy(b <- a)
1055  if (M->getSource() == MDep->getSource())
1056  return false;
1057 
1058  // Second, the length of the memcpy's must be the same, or the preceding one
1059  // must be larger than the following one.
1060  if (MDep->getLength() != M->getLength()) {
1061  ConstantInt *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
1062  ConstantInt *MLen = dyn_cast<ConstantInt>(M->getLength());
1063  if (!MDepLen || !MLen || MDepLen->getZExtValue() < MLen->getZExtValue())
1064  return false;
1065  }
1066 
1067  // Verify that the copied-from memory doesn't change in between the two
1068  // transfers. For example, in:
1069  // memcpy(a <- b)
1070  // *b = 42;
1071  // memcpy(c <- a)
1072  // It would be invalid to transform the second memcpy into memcpy(c <- b).
1073  //
1074  // TODO: If the code between M and MDep is transparent to the destination "c",
1075  // then we could still perform the xform by moving M up to the first memcpy.
1076  if (EnableMemorySSA) {
1077  // TODO: It would be sufficient to check the MDep source up to the memcpy
1078  // size of M, rather than MDep.
1080  MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(M)))
1081  return false;
1082  } else {
1083  // NOTE: This is conservative, it will stop on any read from the source loc,
1084  // not just the defining memcpy.
1085  MemDepResult SourceDep =
1087  M->getIterator(), M->getParent());
1088  if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
1089  return false;
1090  }
1091 
1092  // If the dest of the second might alias the source of the first, then the
1093  // source and dest might overlap. We still want to eliminate the intermediate
1094  // value, but we have to generate a memmove instead of memcpy.
1095  bool UseMemMove = false;
1098  UseMemMove = true;
1099 
1100  // If all checks passed, then we can transform M.
1101  LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy->memcpy src:\n"
1102  << *MDep << '\n' << *M << '\n');
1103 
1104  // TODO: Is this worth it if we're creating a less aligned memcpy? For
1105  // example we could be moving from movaps -> movq on x86.
1106  IRBuilder<> Builder(M);
1107  Instruction *NewM;
1108  if (UseMemMove)
1109  NewM = Builder.CreateMemMove(M->getRawDest(), M->getDestAlign(),
1110  MDep->getRawSource(), MDep->getSourceAlign(),
1111  M->getLength(), M->isVolatile());
1112  else
1113  NewM = Builder.CreateMemCpy(M->getRawDest(), M->getDestAlign(),
1114  MDep->getRawSource(), MDep->getSourceAlign(),
1115  M->getLength(), M->isVolatile());
1116 
1117  if (MSSAU) {
1118  assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M)));
1119  auto *LastDef = cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M));
1120  auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
1121  MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1122  }
1123 
1124  // Remove the instruction we're replacing.
1125  eraseInstruction(M);
1126  ++NumMemCpyInstr;
1127  return true;
1128 }
1129 
1130 /// We've found that the (upward scanning) memory dependence of \p MemCpy is
1131 /// \p MemSet. Try to simplify \p MemSet to only set the trailing bytes that
1132 /// weren't copied over by \p MemCpy.
1133 ///
1134 /// In other words, transform:
1135 /// \code
1136 /// memset(dst, c, dst_size);
1137 /// memcpy(dst, src, src_size);
1138 /// \endcode
1139 /// into:
1140 /// \code
1141 /// memcpy(dst, src, src_size);
1142 /// memset(dst + src_size, c, dst_size <= src_size ? 0 : dst_size - src_size);
1143 /// \endcode
1144 bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
1145  MemSetInst *MemSet) {
1146  // We can only transform memset/memcpy with the same destination.
1147  if (!AA->isMustAlias(MemSet->getDest(), MemCpy->getDest()))
1148  return false;
1149 
1150  // Check that src and dst of the memcpy aren't the same. While memcpy
1151  // operands cannot partially overlap, exact equality is allowed.
1152  if (!AA->isNoAlias(MemoryLocation(MemCpy->getSource(),
1154  MemoryLocation(MemCpy->getDest(),
1155  LocationSize::precise(1))))
1156  return false;
1157 
1158  if (EnableMemorySSA) {
1159  // We know that dst up to src_size is not written. We now need to make sure
1160  // that dst up to dst_size is not accessed. (If we did not move the memset,
1161  // checking for reads would be sufficient.)
1163  MSSA->getMemoryAccess(MemSet),
1164  MSSA->getMemoryAccess(MemCpy))) {
1165  return false;
1166  }
1167  } else {
1168  // We have already checked that dst up to src_size is not accessed. We
1169  // need to make sure that there are no accesses up to dst_size either.
1170  MemDepResult DstDepInfo = MD->getPointerDependencyFrom(
1171  MemoryLocation::getForDest(MemSet), false, MemCpy->getIterator(),
1172  MemCpy->getParent());
1173  if (DstDepInfo.getInst() != MemSet)
1174  return false;
1175  }
1176 
1177  // Use the same i8* dest as the memcpy, killing the memset dest if different.
1178  Value *Dest = MemCpy->getRawDest();
1179  Value *DestSize = MemSet->getLength();
1180  Value *SrcSize = MemCpy->getLength();
1181 
1182  if (mayBeVisibleThroughUnwinding(Dest, MemSet, MemCpy))
1183  return false;
1184 
1185  // If the sizes are the same, simply drop the memset instead of generating
1186  // a replacement with zero size.
1187  if (DestSize == SrcSize) {
1188  eraseInstruction(MemSet);
1189  return true;
1190  }
1191 
1192  // By default, create an unaligned memset.
1193  unsigned Align = 1;
1194  // If Dest is aligned, and SrcSize is constant, use the minimum alignment
1195  // of the sum.
1196  const unsigned DestAlign =
1197  std::max(MemSet->getDestAlignment(), MemCpy->getDestAlignment());
1198  if (DestAlign > 1)
1199  if (ConstantInt *SrcSizeC = dyn_cast<ConstantInt>(SrcSize))
1200  Align = MinAlign(SrcSizeC->getZExtValue(), DestAlign);
1201 
1202  IRBuilder<> Builder(MemCpy);
1203 
1204  // If the sizes have different types, zext the smaller one.
1205  if (DestSize->getType() != SrcSize->getType()) {
1206  if (DestSize->getType()->getIntegerBitWidth() >
1207  SrcSize->getType()->getIntegerBitWidth())
1208  SrcSize = Builder.CreateZExt(SrcSize, DestSize->getType());
1209  else
1210  DestSize = Builder.CreateZExt(DestSize, SrcSize->getType());
1211  }
1212 
1213  Value *Ule = Builder.CreateICmpULE(DestSize, SrcSize);
1214  Value *SizeDiff = Builder.CreateSub(DestSize, SrcSize);
1215  Value *MemsetLen = Builder.CreateSelect(
1216  Ule, ConstantInt::getNullValue(DestSize->getType()), SizeDiff);
1217  Instruction *NewMemSet = Builder.CreateMemSet(
1218  Builder.CreateGEP(Dest->getType()->getPointerElementType(), Dest,
1219  SrcSize),
1220  MemSet->getOperand(1), MemsetLen, MaybeAlign(Align));
1221 
1222  if (MSSAU) {
1223  assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)) &&
1224  "MemCpy must be a MemoryDef");
1225  // The new memset is inserted after the memcpy, but it is known that its
1226  // defining access is the memset about to be removed which immediately
1227  // precedes the memcpy.
1228  auto *LastDef =
1229  cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy));
1230  auto *NewAccess = MSSAU->createMemoryAccessBefore(
1231  NewMemSet, LastDef->getDefiningAccess(), LastDef);
1232  MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1233  }
1234 
1235  eraseInstruction(MemSet);
1236  return true;
1237 }
1238 
1239 /// Determine whether the instruction has undefined content for the given Size,
1240 /// either because it was freshly alloca'd or started its lifetime.
1242  if (isa<AllocaInst>(I))
1243  return true;
1244 
1245  if (ConstantInt *CSize = dyn_cast<ConstantInt>(Size)) {
1246  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
1247  if (II->getIntrinsicID() == Intrinsic::lifetime_start)
1248  if (ConstantInt *LTSize = dyn_cast<ConstantInt>(II->getArgOperand(0)))
1249  if (LTSize->getZExtValue() >= CSize->getZExtValue())
1250  return true;
1251  }
1252 
1253  return false;
1254 }
1255 
1257  MemoryDef *Def, Value *Size) {
1258  if (MSSA->isLiveOnEntryDef(Def))
1259  return isa<AllocaInst>(getUnderlyingObject(V));
1260 
1261  if (IntrinsicInst *II =
1262  dyn_cast_or_null<IntrinsicInst>(Def->getMemoryInst())) {
1263  if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
1264  ConstantInt *LTSize = cast<ConstantInt>(II->getArgOperand(0));
1265 
1266  if (ConstantInt *CSize = dyn_cast<ConstantInt>(Size)) {
1267  if (AA->isMustAlias(V, II->getArgOperand(1)) &&
1268  LTSize->getZExtValue() >= CSize->getZExtValue())
1269  return true;
1270  }
1271 
1272  // If the lifetime.start covers a whole alloca (as it almost always
1273  // does) and we're querying a pointer based on that alloca, then we know
1274  // the memory is definitely undef, regardless of how exactly we alias.
1275  // The size also doesn't matter, as an out-of-bounds access would be UB.
1276  AllocaInst *Alloca = dyn_cast<AllocaInst>(getUnderlyingObject(V));
1277  if (getUnderlyingObject(II->getArgOperand(1)) == Alloca) {
1278  DataLayout DL = Alloca->getModule()->getDataLayout();
1279  if (Optional<TypeSize> AllocaSize = Alloca->getAllocationSizeInBits(DL))
1280  if (*AllocaSize == LTSize->getValue() * 8)
1281  return true;
1282  }
1283  }
1284  }
1285 
1286  return false;
1287 }
1288 
1289 /// Transform memcpy to memset when its source was just memset.
1290 /// In other words, turn:
1291 /// \code
1292 /// memset(dst1, c, dst1_size);
1293 /// memcpy(dst2, dst1, dst2_size);
1294 /// \endcode
1295 /// into:
1296 /// \code
1297 /// memset(dst1, c, dst1_size);
1298 /// memset(dst2, c, dst2_size);
1299 /// \endcode
1300 /// When dst2_size <= dst1_size.
1301 bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
1302  MemSetInst *MemSet) {
1303  // Make sure that memcpy(..., memset(...), ...), that is we are memsetting and
1304  // memcpying from the same address. Otherwise it is hard to reason about.
1305  if (!AA->isMustAlias(MemSet->getRawDest(), MemCpy->getRawSource()))
1306  return false;
1307 
1308  Value *MemSetSize = MemSet->getLength();
1309  Value *CopySize = MemCpy->getLength();
1310 
1311  if (MemSetSize != CopySize) {
1312  // Make sure the memcpy doesn't read any more than what the memset wrote.
1313  // Don't worry about sizes larger than i64.
1314 
1315  // A known memset size is required.
1316  ConstantInt *CMemSetSize = dyn_cast<ConstantInt>(MemSetSize);
1317  if (!CMemSetSize)
1318  return false;
1319 
1320  // A known memcpy size is also required.
1321  ConstantInt *CCopySize = dyn_cast<ConstantInt>(CopySize);
1322  if (!CCopySize)
1323  return false;
1324  if (CCopySize->getZExtValue() > CMemSetSize->getZExtValue()) {
1325  // If the memcpy is larger than the memset, but the memory was undef prior
1326  // to the memset, we can just ignore the tail. Technically we're only
1327  // interested in the bytes from MemSetSize..CopySize here, but as we can't
1328  // easily represent this location, we use the full 0..CopySize range.
1329  MemoryLocation MemCpyLoc = MemoryLocation::getForSource(MemCpy);
1330  bool CanReduceSize = false;
1331  if (EnableMemorySSA) {
1332  MemoryUseOrDef *MemSetAccess = MSSA->getMemoryAccess(MemSet);
1333  MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
1334  MemSetAccess->getDefiningAccess(), MemCpyLoc);
1335  if (auto *MD = dyn_cast<MemoryDef>(Clobber))
1336  if (hasUndefContentsMSSA(MSSA, AA, MemCpy->getSource(), MD, CopySize))
1337  CanReduceSize = true;
1338  } else {
1339  MemDepResult DepInfo = MD->getPointerDependencyFrom(
1340  MemCpyLoc, true, MemSet->getIterator(), MemSet->getParent());
1341  if (DepInfo.isDef() && hasUndefContents(DepInfo.getInst(), CopySize))
1342  CanReduceSize = true;
1343  }
1344 
1345  if (!CanReduceSize)
1346  return false;
1347  CopySize = MemSetSize;
1348  }
1349  }
1350 
1351  IRBuilder<> Builder(MemCpy);
1352  Instruction *NewM =
1353  Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1),
1354  CopySize, MaybeAlign(MemCpy->getDestAlignment()));
1355  if (MSSAU) {
1356  auto *LastDef =
1357  cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy));
1358  auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
1359  MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1360  }
1361 
1362  return true;
1363 }
1364 
1365 /// Perform simplification of memcpy's. If we have memcpy A
1366 /// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite
1367 /// B to be a memcpy from X to Z (or potentially a memmove, depending on
1368 /// circumstances). This allows later passes to remove the first memcpy
1369 /// altogether.
1370 bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
1371  // We can only optimize non-volatile memcpy's.
1372  if (M->isVolatile()) return false;
1373 
1374  // If the source and destination of the memcpy are the same, then zap it.
1375  if (M->getSource() == M->getDest()) {
1376  ++BBI;
1377  eraseInstruction(M);
1378  return true;
1379  }
1380 
1381  // If copying from a constant, try to turn the memcpy into a memset.
1382  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(M->getSource()))
1383  if (GV->isConstant() && GV->hasDefinitiveInitializer())
1384  if (Value *ByteVal = isBytewiseValue(GV->getInitializer(),
1385  M->getModule()->getDataLayout())) {
1386  IRBuilder<> Builder(M);
1387  Instruction *NewM =
1388  Builder.CreateMemSet(M->getRawDest(), ByteVal, M->getLength(),
1389  MaybeAlign(M->getDestAlignment()), false);
1390  if (MSSAU) {
1391  auto *LastDef =
1392  cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M));
1393  auto *NewAccess =
1394  MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
1395  MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1396  }
1397 
1398  eraseInstruction(M);
1399  ++NumCpyToSet;
1400  return true;
1401  }
1402 
1403  if (EnableMemorySSA) {
1404  MemoryUseOrDef *MA = MSSA->getMemoryAccess(M);
1405  MemoryAccess *AnyClobber = MSSA->getWalker()->getClobberingMemoryAccess(MA);
1407  const MemoryAccess *DestClobber =
1408  MSSA->getWalker()->getClobberingMemoryAccess(AnyClobber, DestLoc);
1409 
1410  // Try to turn a partially redundant memset + memcpy into
1411  // memcpy + smaller memset. We don't need the memcpy size for this.
1412  // The memcpy most post-dom the memset, so limit this to the same basic
1413  // block. A non-local generalization is likely not worthwhile.
1414  if (auto *MD = dyn_cast<MemoryDef>(DestClobber))
1415  if (auto *MDep = dyn_cast_or_null<MemSetInst>(MD->getMemoryInst()))
1416  if (DestClobber->getBlock() == M->getParent())
1417  if (processMemSetMemCpyDependence(M, MDep))
1418  return true;
1419 
1420  MemoryAccess *SrcClobber = MSSA->getWalker()->getClobberingMemoryAccess(
1421  AnyClobber, MemoryLocation::getForSource(M));
1422 
1423  // There are four possible optimizations we can do for memcpy:
1424  // a) memcpy-memcpy xform which exposes redundance for DSE.
1425  // b) call-memcpy xform for return slot optimization.
1426  // c) memcpy from freshly alloca'd space or space that has just started
1427  // its lifetime copies undefined data, and we can therefore eliminate
1428  // the memcpy in favor of the data that was already at the destination.
1429  // d) memcpy from a just-memset'd source can be turned into memset.
1430  if (auto *MD = dyn_cast<MemoryDef>(SrcClobber)) {
1431  if (Instruction *MI = MD->getMemoryInst()) {
1432  if (ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength())) {
1433  if (auto *C = dyn_cast<CallInst>(MI)) {
1434  // The memcpy must post-dom the call. Limit to the same block for
1435  // now. Additionally, we need to ensure that there are no accesses
1436  // to dest between the call and the memcpy. Accesses to src will be
1437  // checked by performCallSlotOptzn().
1438  // TODO: Support non-local call-slot optimization?
1439  if (C->getParent() == M->getParent() &&
1440  !accessedBetween(*AA, DestLoc, MD, MA)) {
1441  // FIXME: Can we pass in either of dest/src alignment here instead
1442  // of conservatively taking the minimum?
1443  Align Alignment = std::min(M->getDestAlign().valueOrOne(),
1444  M->getSourceAlign().valueOrOne());
1445  if (performCallSlotOptzn(M, M, M->getDest(), M->getSource(),
1446  CopySize->getZExtValue(), Alignment,
1447  C)) {
1448  LLVM_DEBUG(dbgs() << "Performed call slot optimization:\n"
1449  << " call: " << *C << "\n"
1450  << " memcpy: " << *M << "\n");
1451  eraseInstruction(M);
1452  ++NumMemCpyInstr;
1453  return true;
1454  }
1455  }
1456  }
1457  }
1458  if (auto *MDep = dyn_cast<MemCpyInst>(MI))
1459  return processMemCpyMemCpyDependence(M, MDep);
1460  if (auto *MDep = dyn_cast<MemSetInst>(MI)) {
1461  if (performMemCpyToMemSetOptzn(M, MDep)) {
1462  LLVM_DEBUG(dbgs() << "Converted memcpy to memset\n");
1463  eraseInstruction(M);
1464  ++NumCpyToSet;
1465  return true;
1466  }
1467  }
1468  }
1469 
1470  if (hasUndefContentsMSSA(MSSA, AA, M->getSource(), MD, M->getLength())) {
1471  LLVM_DEBUG(dbgs() << "Removed memcpy from undef\n");
1472  eraseInstruction(M);
1473  ++NumMemCpyInstr;
1474  return true;
1475  }
1476  }
1477  } else {
1478  MemDepResult DepInfo = MD->getDependency(M);
1479 
1480  // Try to turn a partially redundant memset + memcpy into
1481  // memcpy + smaller memset. We don't need the memcpy size for this.
1482  if (DepInfo.isClobber())
1483  if (MemSetInst *MDep = dyn_cast<MemSetInst>(DepInfo.getInst()))
1484  if (processMemSetMemCpyDependence(M, MDep))
1485  return true;
1486 
1487  // There are four possible optimizations we can do for memcpy:
1488  // a) memcpy-memcpy xform which exposes redundance for DSE.
1489  // b) call-memcpy xform for return slot optimization.
1490  // c) memcpy from freshly alloca'd space or space that has just started
1491  // its lifetime copies undefined data, and we can therefore eliminate
1492  // the memcpy in favor of the data that was already at the destination.
1493  // d) memcpy from a just-memset'd source can be turned into memset.
1494  if (ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength())) {
1495  if (DepInfo.isClobber()) {
1496  if (CallInst *C = dyn_cast<CallInst>(DepInfo.getInst())) {
1497  // FIXME: Can we pass in either of dest/src alignment here instead
1498  // of conservatively taking the minimum?
1499  Align Alignment = std::min(M->getDestAlign().valueOrOne(),
1500  M->getSourceAlign().valueOrOne());
1501  if (performCallSlotOptzn(M, M, M->getDest(), M->getSource(),
1502  CopySize->getZExtValue(), Alignment, C)) {
1503  eraseInstruction(M);
1504  ++NumMemCpyInstr;
1505  return true;
1506  }
1507  }
1508  }
1509  }
1510 
1512  MemDepResult SrcDepInfo = MD->getPointerDependencyFrom(
1513  SrcLoc, true, M->getIterator(), M->getParent());
1514 
1515  if (SrcDepInfo.isClobber()) {
1516  if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(SrcDepInfo.getInst()))
1517  return processMemCpyMemCpyDependence(M, MDep);
1518  } else if (SrcDepInfo.isDef()) {
1519  if (hasUndefContents(SrcDepInfo.getInst(), M->getLength())) {
1520  eraseInstruction(M);
1521  ++NumMemCpyInstr;
1522  return true;
1523  }
1524  }
1525 
1526  if (SrcDepInfo.isClobber())
1527  if (MemSetInst *MDep = dyn_cast<MemSetInst>(SrcDepInfo.getInst()))
1528  if (performMemCpyToMemSetOptzn(M, MDep)) {
1529  eraseInstruction(M);
1530  ++NumCpyToSet;
1531  return true;
1532  }
1533  }
1534 
1535  return false;
1536 }
1537 
1538 /// Transforms memmove calls to memcpy calls when the src/dst are guaranteed
1539 /// not to alias.
1540 bool MemCpyOptPass::processMemMove(MemMoveInst *M) {
1541  if (!TLI->has(LibFunc_memmove))
1542  return false;
1543 
1544  // See if the pointers alias.
1547  return false;
1548 
1549  LLVM_DEBUG(dbgs() << "MemCpyOptPass: Optimizing memmove -> memcpy: " << *M
1550  << "\n");
1551 
1552  // If not, then we know we can transform this.
1553  Type *ArgTys[3] = { M->getRawDest()->getType(),
1554  M->getRawSource()->getType(),
1555  M->getLength()->getType() };
1556  M->setCalledFunction(Intrinsic::getDeclaration(M->getModule(),
1557  Intrinsic::memcpy, ArgTys));
1558 
1559  // For MemorySSA nothing really changes (except that memcpy may imply stricter
1560  // aliasing guarantees).
1561 
1562  // MemDep may have over conservative information about this instruction, just
1563  // conservatively flush it from the cache.
1564  if (MD)
1565  MD->removeInstruction(M);
1566 
1567  ++NumMoveToCpy;
1568  return true;
1569 }
1570 
1571 /// This is called on every byval argument in call sites.
1572 bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) {
1573  const DataLayout &DL = CB.getCaller()->getParent()->getDataLayout();
1574  // Find out what feeds this byval argument.
1575  Value *ByValArg = CB.getArgOperand(ArgNo);
1576  Type *ByValTy = cast<PointerType>(ByValArg->getType())->getElementType();
1577  uint64_t ByValSize = DL.getTypeAllocSize(ByValTy);
1578  MemoryLocation Loc(ByValArg, LocationSize::precise(ByValSize));
1579  MemCpyInst *MDep = nullptr;
1580  if (EnableMemorySSA) {
1581  MemoryUseOrDef *CallAccess = MSSA->getMemoryAccess(&CB);
1582  if (!CallAccess)
1583  return false;
1584  MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
1585  CallAccess->getDefiningAccess(), Loc);
1586  if (auto *MD = dyn_cast<MemoryDef>(Clobber))
1587  MDep = dyn_cast_or_null<MemCpyInst>(MD->getMemoryInst());
1588  } else {
1589  MemDepResult DepInfo = MD->getPointerDependencyFrom(
1590  Loc, true, CB.getIterator(), CB.getParent());
1591  if (!DepInfo.isClobber())
1592  return false;
1593  MDep = dyn_cast<MemCpyInst>(DepInfo.getInst());
1594  }
1595 
1596  // If the byval argument isn't fed by a memcpy, ignore it. If it is fed by
1597  // a memcpy, see if we can byval from the source of the memcpy instead of the
1598  // result.
1599  if (!MDep || MDep->isVolatile() ||
1600  ByValArg->stripPointerCasts() != MDep->getDest())
1601  return false;
1602 
1603  // The length of the memcpy must be larger or equal to the size of the byval.
1604  ConstantInt *C1 = dyn_cast<ConstantInt>(MDep->getLength());
1605  if (!C1 || C1->getValue().getZExtValue() < ByValSize)
1606  return false;
1607 
1608  // Get the alignment of the byval. If the call doesn't specify the alignment,
1609  // then it is some target specific value that we can't know.
1610  MaybeAlign ByValAlign = CB.getParamAlign(ArgNo);
1611  if (!ByValAlign) return false;
1612 
1613  // If it is greater than the memcpy, then we check to see if we can force the
1614  // source of the memcpy to the alignment we need. If we fail, we bail out.
1615  MaybeAlign MemDepAlign = MDep->getSourceAlign();
1616  if ((!MemDepAlign || *MemDepAlign < *ByValAlign) &&
1617  getOrEnforceKnownAlignment(MDep->getSource(), ByValAlign, DL, &CB, AC,
1618  DT) < *ByValAlign)
1619  return false;
1620 
1621  // The address space of the memcpy source must match the byval argument
1622  if (MDep->getSource()->getType()->getPointerAddressSpace() !=
1623  ByValArg->getType()->getPointerAddressSpace())
1624  return false;
1625 
1626  // Verify that the copied-from memory doesn't change in between the memcpy and
1627  // the byval call.
1628  // memcpy(a <- b)
1629  // *b = 42;
1630  // foo(*a)
1631  // It would be invalid to transform the second memcpy into foo(*b).
1632  if (EnableMemorySSA) {
1634  MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(&CB)))
1635  return false;
1636  } else {
1637  // NOTE: This is conservative, it will stop on any read from the source loc,
1638  // not just the defining memcpy.
1639  MemDepResult SourceDep = MD->getPointerDependencyFrom(
1640  MemoryLocation::getForSource(MDep), false,
1641  CB.getIterator(), MDep->getParent());
1642  if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
1643  return false;
1644  }
1645 
1646  Value *TmpCast = MDep->getSource();
1647  if (MDep->getSource()->getType() != ByValArg->getType()) {
1648  BitCastInst *TmpBitCast = new BitCastInst(MDep->getSource(), ByValArg->getType(),
1649  "tmpcast", &CB);
1650  // Set the tmpcast's DebugLoc to MDep's
1651  TmpBitCast->setDebugLoc(MDep->getDebugLoc());
1652  TmpCast = TmpBitCast;
1653  }
1654 
1655  LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy to byval:\n"
1656  << " " << *MDep << "\n"
1657  << " " << CB << "\n");
1658 
1659  // Otherwise we're good! Update the byval argument.
1660  CB.setArgOperand(ArgNo, TmpCast);
1661  ++NumMemCpyInstr;
1662  return true;
1663 }
1664 
1665 /// Executes one iteration of MemCpyOptPass.
1666 bool MemCpyOptPass::iterateOnFunction(Function &F) {
1667  bool MadeChange = false;
1668 
1669  // Walk all instruction in the function.
1670  for (BasicBlock &BB : F) {
1671  // Skip unreachable blocks. For example processStore assumes that an
1672  // instruction in a BB can't be dominated by a later instruction in the
1673  // same BB (which is a scenario that can happen for an unreachable BB that
1674  // has itself as a predecessor).
1675  if (!DT->isReachableFromEntry(&BB))
1676  continue;
1677 
1678  for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) {
1679  // Avoid invalidating the iterator.
1680  Instruction *I = &*BI++;
1681 
1682  bool RepeatInstruction = false;
1683 
1684  if (StoreInst *SI = dyn_cast<StoreInst>(I))
1685  MadeChange |= processStore(SI, BI);
1686  else if (MemSetInst *M = dyn_cast<MemSetInst>(I))
1687  RepeatInstruction = processMemSet(M, BI);
1688  else if (MemCpyInst *M = dyn_cast<MemCpyInst>(I))
1689  RepeatInstruction = processMemCpy(M, BI);
1690  else if (MemMoveInst *M = dyn_cast<MemMoveInst>(I))
1691  RepeatInstruction = processMemMove(M);
1692  else if (auto *CB = dyn_cast<CallBase>(I)) {
1693  for (unsigned i = 0, e = CB->arg_size(); i != e; ++i)
1694  if (CB->isByValArgument(i))
1695  MadeChange |= processByValArgument(*CB, i);
1696  }
1697 
1698  // Reprocess the instruction if desired.
1699  if (RepeatInstruction) {
1700  if (BI != BB.begin())
1701  --BI;
1702  MadeChange = true;
1703  }
1704  }
1705  }
1706 
1707  return MadeChange;
1708 }
1709 
1713  auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
1714  auto *AA = &AM.getResult<AAManager>(F);
1715  auto *AC = &AM.getResult<AssumptionAnalysis>(F);
1716  auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
1717  auto *MSSA = EnableMemorySSA ? &AM.getResult<MemorySSAAnalysis>(F)
1719 
1720  bool MadeChange =
1721  runImpl(F, MD, &TLI, AA, AC, DT, MSSA ? &MSSA->getMSSA() : nullptr);
1722  if (!MadeChange)
1723  return PreservedAnalyses::all();
1724 
1725  PreservedAnalyses PA;
1726  PA.preserveSet<CFGAnalyses>();
1727  PA.preserve<GlobalsAA>();
1728  if (MD)
1730  if (MSSA)
1732  return PA;
1733 }
1734 
1736  TargetLibraryInfo *TLI_, AliasAnalysis *AA_,
1737  AssumptionCache *AC_, DominatorTree *DT_,
1738  MemorySSA *MSSA_) {
1739  bool MadeChange = false;
1740  MD = MD_;
1741  TLI = TLI_;
1742  AA = AA_;
1743  AC = AC_;
1744  DT = DT_;
1745  MSSA = MSSA_;
1746  MemorySSAUpdater MSSAU_(MSSA_);
1747  MSSAU = MSSA_ ? &MSSAU_ : nullptr;
1748  // If we don't have at least memset and memcpy, there is little point of doing
1749  // anything here. These are required by a freestanding implementation, so if
1750  // even they are disabled, there is no point in trying hard.
1751  if (!TLI->has(LibFunc_memset) || !TLI->has(LibFunc_memcpy))
1752  return false;
1753 
1754  while (true) {
1755  if (!iterateOnFunction(F))
1756  break;
1757  MadeChange = true;
1758  }
1759 
1760  if (MSSA_ && VerifyMemorySSA)
1761  MSSA_->verifyMemorySSA();
1762 
1763  MD = nullptr;
1764  return MadeChange;
1765 }
1766 
1767 /// This is the main transformation entry point for a function.
1769  if (skipFunction(F))
1770  return false;
1771 
1772  auto *MDWP = !EnableMemorySSA
1773  ? &getAnalysis<MemoryDependenceWrapperPass>()
1774  : getAnalysisIfAvailable<MemoryDependenceWrapperPass>();
1775  auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
1776  auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
1777  auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1778  auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1779  auto *MSSAWP = EnableMemorySSA
1780  ? &getAnalysis<MemorySSAWrapperPass>()
1781  : getAnalysisIfAvailable<MemorySSAWrapperPass>();
1782 
1783  return Impl.runImpl(F, MDWP ? & MDWP->getMemDep() : nullptr, TLI, AA, AC, DT,
1784  MSSAWP ? &MSSAWP->getMSSA() : nullptr);
1785 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
i
i
Definition: README.txt:29
llvm::GlobalsAA
Analysis pass providing a never-invalidated alias analysis result.
Definition: GlobalsModRef.h:132
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
llvm::MemoryDependenceResults::removeInstruction
void removeInstruction(Instruction *InstToRemove)
Removes an instruction from the dependence analysis, updating the dependence of instructions that pre...
Definition: MemoryDependenceAnalysis.cpp:1505
AssumptionCache.h
llvm::AAManager
A manager for alias analyses.
Definition: AliasAnalysis.h:1221
llvm::MemIntrinsicBase::getDestAlignment
unsigned getDestAlignment() const
FIXME: Remove this function once transition to Align is over.
Definition: IntrinsicInst.h:614
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:100
MathExtras.h
llvm::MemoryLocation::get
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
Definition: MemoryLocation.cpp:37
llvm::combineMetadata
void combineMetadata(Instruction *K, const Instruction *J, ArrayRef< unsigned > KnownIDs, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition: Local.cpp:2475
llvm
Definition: AllocatorList.h:23
llvm::SmallVectorImpl::erase
iterator erase(const_iterator CI)
Definition: SmallVector.h:705
llvm::tgtok::Def
@ Def
Definition: TGLexer.h:50
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::Instruction::getModule
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:66
llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:53
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:112
llvm::Value::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:447
llvm::Intrinsic::getDeclaration
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1318
llvm::MemorySSAUpdater::createMemoryAccessBefore
MemoryUseOrDef * createMemoryAccessBefore(Instruction *I, MemoryAccess *Definition, MemoryUseOrDef *InsertPt)
Create a MemoryAccess in MemorySSA before or after an existing MemoryAccess.
Definition: MemorySSAUpdater.cpp:1453
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:90
llvm::AllocaInst::getAlign
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:119
llvm::isModOrRefSet
LLVM_NODISCARD bool isModOrRefSet(const ModRefInfo MRI)
Definition: AliasAnalysis.h:189
IntrinsicInst.h
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:785
Scalar.h
Loads.h
MemorySSAUpdater.h
llvm::Function
Definition: Function.h:61
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
Pass.h
llvm::MemMoveInst
This class wraps the llvm.memmove intrinsic.
Definition: IntrinsicInst.h:902
llvm::BitCastInst
This class represents a no-op cast from one type to another.
Definition: Instructions.h:5138
C1
instcombine should handle this C2 when C1
Definition: README.txt:263
GetElementPtrTypeIterator.h
llvm::Type::getScalarType
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:317
llvm::ConstantInt::getValue
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:131
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
Statistic.h
llvm::isDereferenceableAndAlignedPointer
bool isDereferenceableAndAlignedPointer(const Value *V, Type *Ty, MaybeAlign Alignment, const DataLayout &DL, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Returns true if V is always a dereferenceable pointer with alignment greater or equal than requested.
Definition: Loads.cpp:210
memcpyopt
memcpyopt
Definition: MemCpyOptimizer.cpp:312
llvm::Type::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:693
llvm::IRBuilder<>
llvm::AAResults::isNoAlias
bool isNoAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
A trivial helper function to check to see if the specified pointers are no-alias.
Definition: AliasAnalysis.h:510
hasUndefContents
static bool hasUndefContents(Instruction *I, Value *Size)
Determine whether the instruction has undefined content for the given Size, either because it was fre...
Definition: MemCpyOptimizer.cpp:1241
llvm::GlobalVariable
Definition: GlobalVariable.h:40
llvm::MemorySSA::dominates
bool dominates(const MemoryAccess *A, const MemoryAccess *B) const
Given two memory accesses in potentially different blocks, determine whether MemoryAccess A dominates...
Definition: MemorySSA.cpp:2114
ValueTracking.h
Local.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
GlobalsModRef.h
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:46
Module.h
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:329
llvm::getOrEnforceKnownAlignment
Align getOrEnforceKnownAlignment(Value *V, MaybeAlign PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
Definition: Local.cpp:1337
llvm::ilist_node_impl::getReverseIterator
reverse_self_iterator getReverseIterator()
Definition: ilist_node.h:84
llvm::sys::path::end
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:233
llvm::sys::path::begin
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:224
llvm::isBytewiseValue
Value * isBytewiseValue(Value *V, const DataLayout &DL)
If the specified value can be set by repeating the same byte in memory, return the i8 value that it i...
Definition: ValueTracking.cpp:3780
llvm::Optional< int64_t >
hasUndefContentsMSSA
static bool hasUndefContentsMSSA(MemorySSA *MSSA, AliasAnalysis *AA, Value *V, MemoryDef *Def, Value *Size)
Definition: MemCpyOptimizer.cpp:1256
T
#define T
Definition: Mips16ISelLowering.cpp:341
llvm::createMemCpyOptPass
FunctionPass * createMemCpyOptPass()
The public interface to this file...
Definition: MemCpyOptimizer.cpp:301
llvm::MemDepResult
A memory dependence query can return one of three different answers.
Definition: MemoryDependenceAnalysis.h:49
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:81
Operator.h
llvm::CallBase::isByValArgument
bool isByValArgument(unsigned ArgNo) const
Determine whether this argument is passed by value.
Definition: InstrTypes.h:1657
STLExtras.h
MemoryDependenceAnalysis.h
llvm::LoadInst::getPointerOperand
Value * getPointerOperand()
Definition: Instructions.h:266
llvm::LoadInst::getAlign
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:222
accessedBetween
static bool accessedBetween(AliasAnalysis &AA, MemoryLocation Loc, const MemoryUseOrDef *Start, const MemoryUseOrDef *End)
Definition: MemCpyOptimizer.cpp:341
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:122
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:133
llvm::MemSetBase::getValue
Value * getValue() const
Definition: IntrinsicInst.h:720
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
AliasAnalysis.h
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
llvm::DominatorTree::dominates
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:115
llvm::isPointerOffset
Optional< int64_t > isPointerOffset(const Value *Ptr1, const Value *Ptr2, const DataLayout &DL)
If Ptr1 is provably equal to Ptr2 plus a constant offset, return that offset.
Definition: ValueTracking.cpp:7023
Instruction.h
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:77
llvm::MinAlign
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:672
EnableMemorySSA
static cl::opt< bool > EnableMemorySSA("enable-memcpyopt-memoryssa", cl::init(true), cl::Hidden, cl::desc("Use MemorySSA-backed MemCpyOpt."))
llvm::MemorySSAWrapperPass
Legacy analysis pass which computes MemorySSA.
Definition: MemorySSA.h:961
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
Constants.h
llvm::AAResults
Definition: AliasAnalysis.h:456
llvm::MemorySSA::isLiveOnEntryDef
bool isLiveOnEntryDef(const MemoryAccess *MA) const
Return true if MA represents the live on entry value.
Definition: MemorySSA.h:741
llvm::AllocaInst::getAllocatedType
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
Definition: Instructions.h:112
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::User
Definition: User.h:44
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(MemCpyOptLegacyPass, "memcpyopt", "MemCpy Optimization", false, false) INITIALIZE_PASS_END(MemCpyOptLegacyPass
Intrinsics.h
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::ARM_PROC::A
@ A
Definition: ARMBaseInfo.h:34
InstrTypes.h
llvm::initializeMemCpyOptLegacyPassPass
void initializeMemCpyOptLegacyPassPass(PassRegistry &)
SI
@ SI
Definition: SIInstrInfo.cpp:7463
llvm::AAResults::callCapturesBefore
ModRefInfo callCapturesBefore(const Instruction *I, const MemoryLocation &MemLoc, DominatorTree *DT)
Return information about whether a particular call site modifies or reads the specified memory locati...
Definition: AliasAnalysis.cpp:719
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::MemoryLocation::getForSource
static MemoryLocation getForSource(const MemTransferInst *MTI)
Return a location representing the source of a memory transfer.
Definition: MemoryLocation.cpp:105
llvm::MemTransferBase::getRawSource
Value * getRawSource() const
Return the arguments to the instruction.
Definition: IntrinsicInst.h:660
llvm::MemorySSAUpdater::moveAfter
void moveAfter(MemoryUseOrDef *What, MemoryUseOrDef *Where)
Definition: MemorySSAUpdater.cpp:1196
TargetLibraryInfo.h
DenseSet.h
false
Definition: StackSlotColoring.cpp:142
llvm::MaybeAlign
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:109
llvm::Instruction
Definition: Instruction.h:45
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:281
llvm::AllocaInst::getArraySize
const Value * getArraySize() const
Get the number of elements allocated.
Definition: Instructions.h:99
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
MemCpyOptimizer.h
llvm::LocationSize::precise
static LocationSize precise(uint64_t Value)
Definition: MemoryLocation.h:100
llvm::getUnderlyingObject
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value,...
Definition: ValueTracking.cpp:4314
Align
uint64_t Align
Definition: ELFObjHandler.cpp:83
llvm::MemoryUseOrDef::getMemoryInst
Instruction * getMemoryInst() const
Get the instruction that this MemoryUse represents.
Definition: MemorySSA.h:257
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::isModSet
LLVM_NODISCARD bool isModSet(const ModRefInfo MRI)
Definition: AliasAnalysis.h:196
llvm::MemTransferBase::getSourceAlign
MaybeAlign getSourceAlign() const
Definition: IntrinsicInst.h:685
llvm::AllocaInst::getAllocationSizeInBits
Optional< TypeSize > getAllocationSizeInBits(const DataLayout &DL) const
Get allocation size in bits.
Definition: Instructions.cpp:53
llvm::None
const NoneType None
Definition: None.h:23
llvm::MemoryAccess::getBlock
BasicBlock * getBlock() const
Definition: MemorySSA.h:162
llvm::Type::getIntegerBitWidth
unsigned getIntegerBitWidth() const
Definition: DerivedTypes.h:96
Type.h
llvm::CallBase::getCaller
Function * getCaller()
Helper to get the caller (the parent function).
Definition: Instructions.cpp:278
llvm::MemSetInst
This class wraps the llvm.memset intrinsic.
Definition: IntrinsicInst.h:859
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
Optimization
MemCpy Optimization
Definition: MemCpyOptimizer.cpp:312
G
const DataFlowGraph & G
Definition: RDFGraph.cpp:202
llvm::DenseSet
Implements a dense probed hash-table based set.
Definition: DenseSet.h:268
llvm::MemorySSAWalker::getClobberingMemoryAccess
MemoryAccess * getClobberingMemoryAccess(const Instruction *I)
Given a memory Mod/Ref/ModRef'ing instruction, calling this will give you the nearest dominating Memo...
Definition: MemorySSA.h:1021
BasicBlock.h
llvm::cl::opt< bool >
llvm::MemDepResult::getInst
Instruction * getInst() const
If this is a normal dependency, returns the instruction that is depended on.
Definition: MemoryDependenceAnalysis.h:177
llvm::RISCVFenceField::O
@ O
Definition: RISCVBaseInfo.h:132
mayBeVisibleThroughUnwinding
MemCpy static false bool mayBeVisibleThroughUnwinding(Value *V, Instruction *Start, Instruction *End)
Definition: MemCpyOptimizer.cpp:317
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:303
llvm::TargetLibraryInfoWrapperPass
Definition: TargetLibraryInfo.h:446
llvm::MemorySSAUpdater
Definition: MemorySSAUpdater.h:56
llvm::GlobalValue::getParent
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:572
llvm::MemorySSAUpdater::getMemorySSA
MemorySSA * getMemorySSA() const
Get handle on MemorySSA.
Definition: MemorySSAUpdater.h:249
llvm::AssumptionAnalysis
A function analysis which provides an AssumptionCache.
Definition: AssumptionCache.h:169
llvm::PreservedAnalyses::preserve
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:176
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
MemoryLocation.h
llvm::MemorySSA
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:704
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::MemDepResult::isClobber
bool isClobber() const
Tests if this MemDepResult represents a query that is an instruction clobber dependency.
Definition: MemoryDependenceAnalysis.h:151
llvm::MemorySSA::getMemoryAccess
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
Definition: MemorySSA.h:721
llvm::GetElementPtrInst
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:905
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
llvm::MemCpyOptPass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition: MemCpyOptimizer.cpp:1710
llvm::MemoryDef
Represents a read-write access to memory, whether it is a must-alias, or a may-alias.
Definition: MemorySSA.h:379
llvm::Instruction::setDebugLoc
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:362
llvm::ModRefInfo
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
Definition: AliasAnalysis.h:148
IRBuilder.h
llvm::SmallVectorImpl::const_iterator
typename SuperClass::const_iterator const_iterator
Definition: SmallVector.h:563
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::MachineBasicBlock::size
unsigned size() const
Definition: MachineBasicBlock.h:239
memcpy
<%struct.s * > cast struct s *S to sbyte *< sbyte * > sbyte uint cast struct s *agg result to sbyte *< sbyte * > sbyte uint cast struct s *memtmp to sbyte *< sbyte * > sbyte uint ret void llc ends up issuing two memcpy or custom lower memcpy(of small size) to be ldmia/stmia. I think option 2 is better but the current register allocator cannot allocate a chunk of registers at a time. A feasible temporary solution is to use specific physical registers at the lowering time for small(<
llvm::DominatorTree::isReachableFromEntry
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:328
iterator_range.h
llvm::TargetLibraryInfo::has
bool has(LibFunc F) const
Tests whether a library function is available.
Definition: TargetLibraryInfo.h:311
llvm::MemorySSA::verifyMemorySSA
void verifyMemorySSA() const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
Definition: MemorySSA.cpp:1867
llvm::MemorySSAAnalysis
An analysis that produces MemorySSA for a function.
Definition: MemorySSA.h:922
llvm::MemorySSA::getWalker
MemorySSAWalker * getWalker()
Definition: MemorySSA.cpp:1566
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:651
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:70
addRange
static void addRange(SmallVectorImpl< ConstantInt * > &EndPoints, ConstantInt *Low, ConstantInt *High)
Definition: Metadata.cpp:1005
llvm::MemorySSAUpdater::createMemoryAccessAfter
MemoryUseOrDef * createMemoryAccessAfter(Instruction *I, MemoryAccess *Definition, MemoryAccess *InsertPt)
Definition: MemorySSAUpdater.cpp:1463
llvm::MemIntrinsicBase::getDest
Value * getDest() const
This is just like getRawDest, but it strips off any cast instructions (including addrspacecast) that ...
Definition: IntrinsicInst.h:606
llvm::AssumptionCacheTracker
An immutable pass that tracks lazily created AssumptionCache objects.
Definition: AssumptionCache.h:200
IT
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::ZeroOrMore, cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate IT block based on arch"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow deprecated IT based on ARMv8"), clEnumValN(NoRestrictedIT, "arm-no-restrict-it", "Allow IT blocks based on ARMv7")))
None.h
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1489
llvm::MemorySSAUpdater::insertDef
void insertDef(MemoryDef *Def, bool RenameUses=false)
Insert a definition into the MemorySSA IR.
Definition: MemorySSAUpdater.cpp:314
llvm::CallBase::getParamAlign
MaybeAlign getParamAlign(unsigned ArgNo) const
Extract the alignment for a call or parameter (0=unknown).
Definition: InstrTypes.h:1715
DataLayout.h
llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:253
llvm::LoadInst::isSimple
bool isSimple() const
Definition: Instructions.h:258
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:41
llvm::MemoryDependenceResults::getDependency
MemDepResult getDependency(Instruction *QueryInst)
Returns the instruction on which a memory operation depends.
Definition: MemoryDependenceAnalysis.cpp:638
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
llvm::CFGAnalyses
Represents analyses that only rely on functions' control flow.
Definition: PassManager.h:116
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
llvm::append_range
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
Definition: STLExtras.h:1667
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:81
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::MemoryAccess
Definition: MemorySSA.h:140
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:299
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:174
llvm::MemoryDependenceResults::getPointerDependencyFrom
MemDepResult getPointerDependencyFrom(const MemoryLocation &Loc, bool isLoad, BasicBlock::iterator ScanIt, BasicBlock *BB, Instruction *QueryInst=nullptr, unsigned *Limit=nullptr)
Returns the instruction on which a memory location depends.
Definition: MemoryDependenceAnalysis.cpp:246
llvm::CallBase::setArgOperand
void setArgOperand(unsigned i, Value *v)
Definition: InstrTypes.h:1346
llvm::MemCpyOptPass::runImpl
bool runImpl(Function &F, MemoryDependenceResults *MD, TargetLibraryInfo *TLI, AAResults *AA, AssumptionCache *AC, DominatorTree *DT, MemorySSA *MSSA)
Definition: MemCpyOptimizer.cpp:1735
llvm::Value::stripPointerCasts
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:649
Argument.h
llvm::ConstantInt::getZExtValue
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:140
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
llvm::partition_point
auto partition_point(R &&Range, Predicate P)
Binary search for the first iterator in a range where a predicate is false.
Definition: STLExtras.h:1630
llvm::MemoryDependenceResults
Provides a lazy, caching interface for making common memory aliasing information queries,...
Definition: MemoryDependenceAnalysis.h:276
llvm::commonAlignment
Align commonAlignment(Align A, Align B)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:211
llvm::empty
constexpr bool empty(const T &RangeOrContainer)
Test whether RangeOrContainer is empty. Similar to C++17 std::empty.
Definition: STLExtras.h:254
llvm::Constant::getNullValue
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:347
llvm::MemoryDependenceWrapperPass
A wrapper analysis pass for the legacy pass manager that exposes a MemoryDepnedenceResults instance.
Definition: MemoryDependenceAnalysis.h:519
llvm::MemoryUseOrDef::getDefiningAccess
MemoryAccess * getDefiningAccess() const
Get the access that produces the memory state used by this Use.
Definition: MemorySSA.h:260
llvm::MemIntrinsicBase::getLength
Value * getLength() const
Definition: IntrinsicInst.h:597
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
llvm::CallBase::arg_size
unsigned arg_size() const
Definition: InstrTypes.h:1329
GlobalVariable.h
llvm::isGuaranteedToTransferExecutionToSuccessor
bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
Definition: ValueTracking.cpp:5224
llvm::AnalysisManager::getCachedResult
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Definition: PassManager.h:804
Casting.h
Function.h
PassManager.h
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:207
llvm::MemoryUseOrDef
Class that has the common methods + fields of memory uses/defs.
Definition: MemorySSA.h:250
llvm::MemCpyOptPass
Definition: MemCpyOptimizer.h:42
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:45
llvm::DominatorTreeAnalysis
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:252
MemorySSA.h
Instructions.h
llvm::MemCpyInst
This class wraps the llvm.memcpy intrinsic.
Definition: IntrinsicInst.h:890
llvm::PreservedAnalyses::preserveSet
void preserveSet()
Mark an analysis set as preserved.
Definition: PassManager.h:191
llvm::MemIntrinsic::isVolatile
bool isVolatile() const
Definition: IntrinsicInst.h:837
SmallVector.h
llvm::Instruction::getDebugLoc
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:365
User.h
Dominators.h
llvm::Type::getPointerElementType
Type * getPointerElementType() const
Definition: Type.h:378
llvm::SmallVectorImpl::iterator
typename SuperClass::iterator iterator
Definition: SmallVector.h:562
llvm::CallBase::getArgOperand
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1341
llvm::AAResultsWrapperPass
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Definition: AliasAnalysis.h:1269
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:94
llvm::GlobalsAAWrapperPass
Legacy wrapper pass to provide the GlobalsAAResult object.
Definition: GlobalsModRef.h:143
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
llvm::AAResults::isMustAlias
bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
A trivial helper function to check to see if the specified pointers are must-alias.
Definition: AliasAnalysis.h:528
llvm::MemIntrinsicBase::getRawDest
Value * getRawDest() const
Definition: IntrinsicInst.h:591
llvm::CallBase
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1164
llvm::Module::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:397
DerivedTypes.h
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1450
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:171
llvm::CastInst::CreatePointerCast
static CastInst * CreatePointerCast(Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd)
Create a BitCast AddrSpaceCast, or a PtrToInt cast instruction.
Definition: Instructions.cpp:3034
llvm::MemoryLocation::getForDest
static MemoryLocation getForDest(const MemIntrinsic *MI)
Return a location representing the destination of a memory set or transfer.
Definition: MemoryLocation.cpp:126
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::VerifyMemorySSA
bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition: MemorySSA.cpp:91
LLVMContext.h
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:389
llvm::AllocaInst
an instruction to allocate memory on the stack
Definition: Instructions.h:61
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::cl::desc
Definition: CommandLine.h:414
raw_ostream.h
Value.h
InitializePasses.h
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
Debug.h
llvm::TargetLibraryAnalysis
Analysis pass providing the TargetLibraryInfo.
Definition: TargetLibraryInfo.h:421
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:434
llvm::AAResults::getModRefInfo
ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc)
getModRefInfo (for call sites) - Return information about whether a particular call site modifies or ...
Definition: AliasAnalysis.cpp:219
llvm::MemoryLocation
Representation for a specific memory location.
Definition: MemoryLocation.h:209
llvm::MemorySSAUpdater::removeMemoryAccess
void removeMemoryAccess(MemoryAccess *, bool OptimizePhis=false)
Remove a MemoryAccess from MemorySSA, including updating all definitions and uses.
Definition: MemorySSAUpdater.cpp:1305
writtenBetween
static bool writtenBetween(MemorySSA *MSSA, MemoryLocation Loc, const MemoryUseOrDef *Start, const MemoryUseOrDef *End)
Definition: MemCpyOptimizer.cpp:356
llvm::MemTransferBase::getSource
Value * getSource() const
This is just like getRawSource, but it strips off any cast instructions that feed it,...
Definition: IntrinsicInst.h:671
llvm::MemoryDependenceAnalysis
An analysis that produces MemoryDependenceResults for a function.
Definition: MemoryDependenceAnalysis.h:500
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38
llvm::SmallVectorImpl::insert
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:773
llvm::MemDepResult::isDef
bool isDef() const
Tests if this MemDepResult represents a query that is an instruction definition dependency.
Definition: MemoryDependenceAnalysis.h:155