LLVM  8.0.0svn
InstCombineLoadStoreAlloca.cpp
Go to the documentation of this file.
1 //===- InstCombineLoadStoreAlloca.cpp -------------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the visit functions for load, store and alloca.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "InstCombineInternal.h"
15 #include "llvm/ADT/MapVector.h"
16 #include "llvm/ADT/SmallString.h"
17 #include "llvm/ADT/Statistic.h"
18 #include "llvm/Analysis/Loads.h"
20 #include "llvm/IR/ConstantRange.h"
21 #include "llvm/IR/DataLayout.h"
22 #include "llvm/IR/IntrinsicInst.h"
23 #include "llvm/IR/LLVMContext.h"
24 #include "llvm/IR/MDBuilder.h"
25 #include "llvm/IR/PatternMatch.h"
27 using namespace llvm;
28 using namespace PatternMatch;
29 
30 #define DEBUG_TYPE "instcombine"
31 
32 STATISTIC(NumDeadStore, "Number of dead stores eliminated");
33 STATISTIC(NumGlobalCopies, "Number of allocas copied from constant global");
34 
35 /// pointsToConstantGlobal - Return true if V (possibly indirectly) points to
36 /// some part of a constant global variable. This intentionally only accepts
37 /// constant expressions because we can't rewrite arbitrary instructions.
38 static bool pointsToConstantGlobal(Value *V) {
39  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
40  return GV->isConstant();
41 
42  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
43  if (CE->getOpcode() == Instruction::BitCast ||
44  CE->getOpcode() == Instruction::AddrSpaceCast ||
45  CE->getOpcode() == Instruction::GetElementPtr)
46  return pointsToConstantGlobal(CE->getOperand(0));
47  }
48  return false;
49 }
50 
51 /// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived)
52 /// pointer to an alloca. Ignore any reads of the pointer, return false if we
53 /// see any stores or other unknown uses. If we see pointer arithmetic, keep
54 /// track of whether it moves the pointer (with IsOffset) but otherwise traverse
55 /// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to
56 /// the alloca, and if the source pointer is a pointer to a constant global, we
57 /// can optimize this.
58 static bool
61  // We track lifetime intrinsics as we encounter them. If we decide to go
62  // ahead and replace the value with the global, this lets the caller quickly
63  // eliminate the markers.
64 
65  SmallVector<std::pair<Value *, bool>, 35> ValuesToInspect;
66  ValuesToInspect.emplace_back(V, false);
67  while (!ValuesToInspect.empty()) {
68  auto ValuePair = ValuesToInspect.pop_back_val();
69  const bool IsOffset = ValuePair.second;
70  for (auto &U : ValuePair.first->uses()) {
71  auto *I = cast<Instruction>(U.getUser());
72 
73  if (auto *LI = dyn_cast<LoadInst>(I)) {
74  // Ignore non-volatile loads, they are always ok.
75  if (!LI->isSimple()) return false;
76  continue;
77  }
78 
79  if (isa<BitCastInst>(I) || isa<AddrSpaceCastInst>(I)) {
80  // If uses of the bitcast are ok, we are ok.
81  ValuesToInspect.emplace_back(I, IsOffset);
82  continue;
83  }
84  if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
85  // If the GEP has all zero indices, it doesn't offset the pointer. If it
86  // doesn't, it does.
87  ValuesToInspect.emplace_back(I, IsOffset || !GEP->hasAllZeroIndices());
88  continue;
89  }
90 
91  if (auto CS = CallSite(I)) {
92  // If this is the function being called then we treat it like a load and
93  // ignore it.
94  if (CS.isCallee(&U))
95  continue;
96 
97  unsigned DataOpNo = CS.getDataOperandNo(&U);
98  bool IsArgOperand = CS.isArgOperand(&U);
99 
100  // Inalloca arguments are clobbered by the call.
101  if (IsArgOperand && CS.isInAllocaArgument(DataOpNo))
102  return false;
103 
104  // If this is a readonly/readnone call site, then we know it is just a
105  // load (but one that potentially returns the value itself), so we can
106  // ignore it if we know that the value isn't captured.
107  if (CS.onlyReadsMemory() &&
108  (CS.getInstruction()->use_empty() || CS.doesNotCapture(DataOpNo)))
109  continue;
110 
111  // If this is being passed as a byval argument, the caller is making a
112  // copy, so it is only a read of the alloca.
113  if (IsArgOperand && CS.isByValArgument(DataOpNo))
114  continue;
115  }
116 
117  // Lifetime intrinsics can be handled by the caller.
118  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
119  if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
120  II->getIntrinsicID() == Intrinsic::lifetime_end) {
121  assert(II->use_empty() && "Lifetime markers have no result to use!");
122  ToDelete.push_back(II);
123  continue;
124  }
125  }
126 
127  // If this is isn't our memcpy/memmove, reject it as something we can't
128  // handle.
130  if (!MI)
131  return false;
132 
133  // If the transfer is using the alloca as a source of the transfer, then
134  // ignore it since it is a load (unless the transfer is volatile).
135  if (U.getOperandNo() == 1) {
136  if (MI->isVolatile()) return false;
137  continue;
138  }
139 
140  // If we already have seen a copy, reject the second one.
141  if (TheCopy) return false;
142 
143  // If the pointer has been offset from the start of the alloca, we can't
144  // safely handle this.
145  if (IsOffset) return false;
146 
147  // If the memintrinsic isn't using the alloca as the dest, reject it.
148  if (U.getOperandNo() != 0) return false;
149 
150  // If the source of the memcpy/move is not a constant global, reject it.
151  if (!pointsToConstantGlobal(MI->getSource()))
152  return false;
153 
154  // Otherwise, the transform is safe. Remember the copy instruction.
155  TheCopy = MI;
156  }
157  }
158  return true;
159 }
160 
161 /// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only
162 /// modified by a copy from a constant global. If we can prove this, we can
163 /// replace any uses of the alloca with uses of the global directly.
164 static MemTransferInst *
166  SmallVectorImpl<Instruction *> &ToDelete) {
167  MemTransferInst *TheCopy = nullptr;
168  if (isOnlyCopiedFromConstantGlobal(AI, TheCopy, ToDelete))
169  return TheCopy;
170  return nullptr;
171 }
172 
173 /// Returns true if V is dereferenceable for size of alloca.
174 static bool isDereferenceableForAllocaSize(const Value *V, const AllocaInst *AI,
175  const DataLayout &DL) {
176  if (AI->isArrayAllocation())
177  return false;
178  uint64_t AllocaSize = DL.getTypeStoreSize(AI->getAllocatedType());
179  if (!AllocaSize)
180  return false;
182  APInt(64, AllocaSize), DL);
183 }
184 
186  // Check for array size of 1 (scalar allocation).
187  if (!AI.isArrayAllocation()) {
188  // i32 1 is the canonical array size for scalar allocations.
189  if (AI.getArraySize()->getType()->isIntegerTy(32))
190  return nullptr;
191 
192  // Canonicalize it.
193  Value *V = IC.Builder.getInt32(1);
194  AI.setOperand(0, V);
195  return &AI;
196  }
197 
198  // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1
199  if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
200  if (C->getValue().getActiveBits() <= 64) {
201  Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
202  AllocaInst *New = IC.Builder.CreateAlloca(NewTy, nullptr, AI.getName());
203  New->setAlignment(AI.getAlignment());
204 
205  // Scan to the end of the allocation instructions, to skip over a block of
206  // allocas if possible...also skip interleaved debug info
207  //
208  BasicBlock::iterator It(New);
209  while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It))
210  ++It;
211 
212  // Now that I is pointing to the first non-allocation-inst in the block,
213  // insert our getelementptr instruction...
214  //
215  Type *IdxTy = IC.getDataLayout().getIntPtrType(AI.getType());
216  Value *NullIdx = Constant::getNullValue(IdxTy);
217  Value *Idx[2] = {NullIdx, NullIdx};
218  Instruction *GEP =
219  GetElementPtrInst::CreateInBounds(New, Idx, New->getName() + ".sub");
220  IC.InsertNewInstBefore(GEP, *It);
221 
222  // Now make everything use the getelementptr instead of the original
223  // allocation.
224  return IC.replaceInstUsesWith(AI, GEP);
225  }
226  }
227 
228  if (isa<UndefValue>(AI.getArraySize()))
230 
231  // Ensure that the alloca array size argument has type intptr_t, so that
232  // any casting is exposed early.
233  Type *IntPtrTy = IC.getDataLayout().getIntPtrType(AI.getType());
234  if (AI.getArraySize()->getType() != IntPtrTy) {
235  Value *V = IC.Builder.CreateIntCast(AI.getArraySize(), IntPtrTy, false);
236  AI.setOperand(0, V);
237  return &AI;
238  }
239 
240  return nullptr;
241 }
242 
243 namespace {
244 // If I and V are pointers in different address space, it is not allowed to
245 // use replaceAllUsesWith since I and V have different types. A
246 // non-target-specific transformation should not use addrspacecast on V since
247 // the two address space may be disjoint depending on target.
248 //
249 // This class chases down uses of the old pointer until reaching the load
250 // instructions, then replaces the old pointer in the load instructions with
251 // the new pointer. If during the chasing it sees bitcast or GEP, it will
252 // create new bitcast or GEP with the new pointer and use them in the load
253 // instruction.
254 class PointerReplacer {
255 public:
256  PointerReplacer(InstCombiner &IC) : IC(IC) {}
257  void replacePointer(Instruction &I, Value *V);
258 
259 private:
260  void findLoadAndReplace(Instruction &I);
261  void replace(Instruction *I);
262  Value *getReplacement(Value *I);
263 
266  InstCombiner &IC;
267 };
268 } // end anonymous namespace
269 
270 void PointerReplacer::findLoadAndReplace(Instruction &I) {
271  for (auto U : I.users()) {
272  auto *Inst = dyn_cast<Instruction>(&*U);
273  if (!Inst)
274  return;
275  LLVM_DEBUG(dbgs() << "Found pointer user: " << *U << '\n');
276  if (isa<LoadInst>(Inst)) {
277  for (auto P : Path)
278  replace(P);
279  replace(Inst);
280  } else if (isa<GetElementPtrInst>(Inst) || isa<BitCastInst>(Inst)) {
281  Path.push_back(Inst);
282  findLoadAndReplace(*Inst);
283  Path.pop_back();
284  } else {
285  return;
286  }
287  }
288 }
289 
290 Value *PointerReplacer::getReplacement(Value *V) {
291  auto Loc = WorkMap.find(V);
292  if (Loc != WorkMap.end())
293  return Loc->second;
294  return nullptr;
295 }
296 
298  if (getReplacement(I))
299  return;
300 
301  if (auto *LT = dyn_cast<LoadInst>(I)) {
302  auto *V = getReplacement(LT->getPointerOperand());
303  assert(V && "Operand not replaced");
304  auto *NewI = new LoadInst(V);
305  NewI->takeName(LT);
306  IC.InsertNewInstWith(NewI, *LT);
307  IC.replaceInstUsesWith(*LT, NewI);
308  WorkMap[LT] = NewI;
309  } else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
310  auto *V = getReplacement(GEP->getPointerOperand());
311  assert(V && "Operand not replaced");
312  SmallVector<Value *, 8> Indices;
313  Indices.append(GEP->idx_begin(), GEP->idx_end());
314  auto *NewI = GetElementPtrInst::Create(
315  V->getType()->getPointerElementType(), V, Indices);
316  IC.InsertNewInstWith(NewI, *GEP);
317  NewI->takeName(GEP);
318  WorkMap[GEP] = NewI;
319  } else if (auto *BC = dyn_cast<BitCastInst>(I)) {
320  auto *V = getReplacement(BC->getOperand(0));
321  assert(V && "Operand not replaced");
322  auto *NewT = PointerType::get(BC->getType()->getPointerElementType(),
324  auto *NewI = new BitCastInst(V, NewT);
325  IC.InsertNewInstWith(NewI, *BC);
326  NewI->takeName(BC);
327  WorkMap[BC] = NewI;
328  } else {
329  llvm_unreachable("should never reach here");
330  }
331 }
332 
333 void PointerReplacer::replacePointer(Instruction &I, Value *V) {
334 #ifndef NDEBUG
335  auto *PT = cast<PointerType>(I.getType());
336  auto *NT = cast<PointerType>(V->getType());
337  assert(PT != NT && PT->getElementType() == NT->getElementType() &&
338  "Invalid usage");
339 #endif
340  WorkMap[&I] = V;
341  findLoadAndReplace(I);
342 }
343 
345  if (auto *I = simplifyAllocaArraySize(*this, AI))
346  return I;
347 
348  if (AI.getAllocatedType()->isSized()) {
349  // If the alignment is 0 (unspecified), assign it the preferred alignment.
350  if (AI.getAlignment() == 0)
351  AI.setAlignment(DL.getPrefTypeAlignment(AI.getAllocatedType()));
352 
353  // Move all alloca's of zero byte objects to the entry block and merge them
354  // together. Note that we only do this for alloca's, because malloc should
355  // allocate and return a unique pointer, even for a zero byte allocation.
356  if (DL.getTypeAllocSize(AI.getAllocatedType()) == 0) {
357  // For a zero sized alloca there is no point in doing an array allocation.
358  // This is helpful if the array size is a complicated expression not used
359  // elsewhere.
360  if (AI.isArrayAllocation()) {
361  AI.setOperand(0, ConstantInt::get(AI.getArraySize()->getType(), 1));
362  return &AI;
363  }
364 
365  // Get the first instruction in the entry block.
366  BasicBlock &EntryBlock = AI.getParent()->getParent()->getEntryBlock();
367  Instruction *FirstInst = EntryBlock.getFirstNonPHIOrDbg();
368  if (FirstInst != &AI) {
369  // If the entry block doesn't start with a zero-size alloca then move
370  // this one to the start of the entry block. There is no problem with
371  // dominance as the array size was forced to a constant earlier already.
372  AllocaInst *EntryAI = dyn_cast<AllocaInst>(FirstInst);
373  if (!EntryAI || !EntryAI->getAllocatedType()->isSized() ||
374  DL.getTypeAllocSize(EntryAI->getAllocatedType()) != 0) {
375  AI.moveBefore(FirstInst);
376  return &AI;
377  }
378 
379  // If the alignment of the entry block alloca is 0 (unspecified),
380  // assign it the preferred alignment.
381  if (EntryAI->getAlignment() == 0)
382  EntryAI->setAlignment(
383  DL.getPrefTypeAlignment(EntryAI->getAllocatedType()));
384  // Replace this zero-sized alloca with the one at the start of the entry
385  // block after ensuring that the address will be aligned enough for both
386  // types.
387  unsigned MaxAlign = std::max(EntryAI->getAlignment(),
388  AI.getAlignment());
389  EntryAI->setAlignment(MaxAlign);
390  if (AI.getType() != EntryAI->getType())
391  return new BitCastInst(EntryAI, AI.getType());
392  return replaceInstUsesWith(AI, EntryAI);
393  }
394  }
395  }
396 
397  if (AI.getAlignment()) {
398  // Check to see if this allocation is only modified by a memcpy/memmove from
399  // a constant global whose alignment is equal to or exceeds that of the
400  // allocation. If this is the case, we can change all users to use
401  // the constant global instead. This is commonly produced by the CFE by
402  // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A'
403  // is only subsequently read.
405  if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) {
406  unsigned SourceAlign = getOrEnforceKnownAlignment(
407  Copy->getSource(), AI.getAlignment(), DL, &AI, &AC, &DT);
408  if (AI.getAlignment() <= SourceAlign &&
409  isDereferenceableForAllocaSize(Copy->getSource(), &AI, DL)) {
410  LLVM_DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n');
411  LLVM_DEBUG(dbgs() << " memcpy = " << *Copy << '\n');
412  for (unsigned i = 0, e = ToDelete.size(); i != e; ++i)
413  eraseInstFromFunction(*ToDelete[i]);
414  Constant *TheSrc = cast<Constant>(Copy->getSource());
415  auto *SrcTy = TheSrc->getType();
416  auto *DestTy = PointerType::get(AI.getType()->getPointerElementType(),
417  SrcTy->getPointerAddressSpace());
418  Constant *Cast =
420  if (AI.getType()->getPointerAddressSpace() ==
421  SrcTy->getPointerAddressSpace()) {
422  Instruction *NewI = replaceInstUsesWith(AI, Cast);
423  eraseInstFromFunction(*Copy);
424  ++NumGlobalCopies;
425  return NewI;
426  } else {
427  PointerReplacer PtrReplacer(*this);
428  PtrReplacer.replacePointer(AI, Cast);
429  ++NumGlobalCopies;
430  }
431  }
432  }
433  }
434 
435  // At last, use the generic allocation site handler to aggressively remove
436  // unused allocas.
437  return visitAllocSite(AI);
438 }
439 
440 // Are we allowed to form a atomic load or store of this type?
441 static bool isSupportedAtomicType(Type *Ty) {
442  return Ty->isIntOrPtrTy() || Ty->isFloatingPointTy();
443 }
444 
445 /// Helper to combine a load to a new type.
446 ///
447 /// This just does the work of combining a load to a new type. It handles
448 /// metadata, etc., and returns the new instruction. The \c NewTy should be the
449 /// loaded *value* type. This will convert it to a pointer, cast the operand to
450 /// that pointer type, load it, etc.
451 ///
452 /// Note that this will create all of the instructions with whatever insert
453 /// point the \c InstCombiner currently is using.
455  const Twine &Suffix = "") {
456  assert((!LI.isAtomic() || isSupportedAtomicType(NewTy)) &&
457  "can't fold an atomic load to requested type");
458 
459  Value *Ptr = LI.getPointerOperand();
460  unsigned AS = LI.getPointerAddressSpace();
462  LI.getAllMetadata(MD);
463 
464  Value *NewPtr = nullptr;
465  if (!(match(Ptr, m_BitCast(m_Value(NewPtr))) &&
466  NewPtr->getType()->getPointerElementType() == NewTy &&
467  NewPtr->getType()->getPointerAddressSpace() == AS))
468  NewPtr = IC.Builder.CreateBitCast(Ptr, NewTy->getPointerTo(AS));
469 
470  LoadInst *NewLoad = IC.Builder.CreateAlignedLoad(
471  NewPtr, LI.getAlignment(), LI.isVolatile(), LI.getName() + Suffix);
472  NewLoad->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
473  MDBuilder MDB(NewLoad->getContext());
474  for (const auto &MDPair : MD) {
475  unsigned ID = MDPair.first;
476  MDNode *N = MDPair.second;
477  // Note, essentially every kind of metadata should be preserved here! This
478  // routine is supposed to clone a load instruction changing *only its type*.
479  // The only metadata it makes sense to drop is metadata which is invalidated
480  // when the pointer type changes. This should essentially never be the case
481  // in LLVM, but we explicitly switch over only known metadata to be
482  // conservatively correct. If you are adding metadata to LLVM which pertains
483  // to loads, you almost certainly want to add it here.
484  switch (ID) {
485  case LLVMContext::MD_dbg:
495  // All of these directly apply.
496  NewLoad->setMetadata(ID, N);
497  break;
498 
500  copyNonnullMetadata(LI, N, *NewLoad);
501  break;
505  // These only directly apply if the new type is also a pointer.
506  if (NewTy->isPointerTy())
507  NewLoad->setMetadata(ID, N);
508  break;
510  copyRangeMetadata(IC.getDataLayout(), LI, N, *NewLoad);
511  break;
512  }
513  }
514  return NewLoad;
515 }
516 
517 /// Combine a store to a new type.
518 ///
519 /// Returns the newly created store instruction.
521  assert((!SI.isAtomic() || isSupportedAtomicType(V->getType())) &&
522  "can't fold an atomic store of requested type");
523 
524  Value *Ptr = SI.getPointerOperand();
525  unsigned AS = SI.getPointerAddressSpace();
527  SI.getAllMetadata(MD);
528 
529  StoreInst *NewStore = IC.Builder.CreateAlignedStore(
530  V, IC.Builder.CreateBitCast(Ptr, V->getType()->getPointerTo(AS)),
531  SI.getAlignment(), SI.isVolatile());
532  NewStore->setAtomic(SI.getOrdering(), SI.getSyncScopeID());
533  for (const auto &MDPair : MD) {
534  unsigned ID = MDPair.first;
535  MDNode *N = MDPair.second;
536  // Note, essentially every kind of metadata should be preserved here! This
537  // routine is supposed to clone a store instruction changing *only its
538  // type*. The only metadata it makes sense to drop is metadata which is
539  // invalidated when the pointer type changes. This should essentially
540  // never be the case in LLVM, but we explicitly switch over only known
541  // metadata to be conservatively correct. If you are adding metadata to
542  // LLVM which pertains to stores, you almost certainly want to add it
543  // here.
544  switch (ID) {
545  case LLVMContext::MD_dbg:
554  // All of these directly apply.
555  NewStore->setMetadata(ID, N);
556  break;
557 
564  // These don't apply for stores.
565  break;
566  }
567  }
568 
569  return NewStore;
570 }
571 
572 /// Returns true if instruction represent minmax pattern like:
573 /// select ((cmp load V1, load V2), V1, V2).
574 static bool isMinMaxWithLoads(Value *V) {
575  assert(V->getType()->isPointerTy() && "Expected pointer type.");
576  // Ignore possible ty* to ixx* bitcast.
577  V = peekThroughBitcast(V);
578  // Check that select is select ((cmp load V1, load V2), V1, V2) - minmax
579  // pattern.
580  CmpInst::Predicate Pred;
581  Instruction *L1;
582  Instruction *L2;
583  Value *LHS;
584  Value *RHS;
585  if (!match(V, m_Select(m_Cmp(Pred, m_Instruction(L1), m_Instruction(L2)),
586  m_Value(LHS), m_Value(RHS))))
587  return false;
588  return (match(L1, m_Load(m_Specific(LHS))) &&
589  match(L2, m_Load(m_Specific(RHS)))) ||
590  (match(L1, m_Load(m_Specific(RHS))) &&
591  match(L2, m_Load(m_Specific(LHS))));
592 }
593 
594 /// Combine loads to match the type of their uses' value after looking
595 /// through intervening bitcasts.
596 ///
597 /// The core idea here is that if the result of a load is used in an operation,
598 /// we should load the type most conducive to that operation. For example, when
599 /// loading an integer and converting that immediately to a pointer, we should
600 /// instead directly load a pointer.
601 ///
602 /// However, this routine must never change the width of a load or the number of
603 /// loads as that would introduce a semantic change. This combine is expected to
604 /// be a semantic no-op which just allows loads to more closely model the types
605 /// of their consuming operations.
606 ///
607 /// Currently, we also refuse to change the precise type used for an atomic load
608 /// or a volatile load. This is debatable, and might be reasonable to change
609 /// later. However, it is risky in case some backend or other part of LLVM is
610 /// relying on the exact type loaded to select appropriate atomic operations.
612  // FIXME: We could probably with some care handle both volatile and ordered
613  // atomic loads here but it isn't clear that this is important.
614  if (!LI.isUnordered())
615  return nullptr;
616 
617  if (LI.use_empty())
618  return nullptr;
619 
620  // swifterror values can't be bitcasted.
621  if (LI.getPointerOperand()->isSwiftError())
622  return nullptr;
623 
624  Type *Ty = LI.getType();
625  const DataLayout &DL = IC.getDataLayout();
626 
627  // Try to canonicalize loads which are only ever stored to operate over
628  // integers instead of any other type. We only do this when the loaded type
629  // is sized and has a size exactly the same as its store size and the store
630  // size is a legal integer type.
631  // Do not perform canonicalization if minmax pattern is found (to avoid
632  // infinite loop).
633  if (!Ty->isIntegerTy() && Ty->isSized() &&
635  DL.getTypeStoreSizeInBits(Ty) == DL.getTypeSizeInBits(Ty) &&
636  !DL.isNonIntegralPointerType(Ty) &&
638  peekThroughBitcast(LI.getPointerOperand(), /*OneUseOnly=*/true))) {
639  if (all_of(LI.users(), [&LI](User *U) {
640  auto *SI = dyn_cast<StoreInst>(U);
641  return SI && SI->getPointerOperand() != &LI &&
642  !SI->getPointerOperand()->isSwiftError();
643  })) {
644  LoadInst *NewLoad = combineLoadToNewType(
645  IC, LI,
647  // Replace all the stores with stores of the newly loaded value.
648  for (auto UI = LI.user_begin(), UE = LI.user_end(); UI != UE;) {
649  auto *SI = cast<StoreInst>(*UI++);
651  combineStoreToNewValue(IC, *SI, NewLoad);
653  }
654  assert(LI.use_empty() && "Failed to remove all users of the load!");
655  // Return the old load so the combiner can delete it safely.
656  return &LI;
657  }
658  }
659 
660  // Fold away bit casts of the loaded value by loading the desired type.
661  // We can do this for BitCastInsts as well as casts from and to pointer types,
662  // as long as those are noops (i.e., the source or dest type have the same
663  // bitwidth as the target's pointers).
664  if (LI.hasOneUse())
665  if (auto* CI = dyn_cast<CastInst>(LI.user_back()))
666  if (CI->isNoopCast(DL))
667  if (!LI.isAtomic() || isSupportedAtomicType(CI->getDestTy())) {
668  LoadInst *NewLoad = combineLoadToNewType(IC, LI, CI->getDestTy());
669  CI->replaceAllUsesWith(NewLoad);
670  IC.eraseInstFromFunction(*CI);
671  return &LI;
672  }
673 
674  // FIXME: We should also canonicalize loads of vectors when their elements are
675  // cast to other types.
676  return nullptr;
677 }
678 
680  // FIXME: We could probably with some care handle both volatile and atomic
681  // stores here but it isn't clear that this is important.
682  if (!LI.isSimple())
683  return nullptr;
684 
685  Type *T = LI.getType();
686  if (!T->isAggregateType())
687  return nullptr;
688 
689  StringRef Name = LI.getName();
690  assert(LI.getAlignment() && "Alignment must be set at this point");
691 
692  if (auto *ST = dyn_cast<StructType>(T)) {
693  // If the struct only have one element, we unpack.
694  auto NumElements = ST->getNumElements();
695  if (NumElements == 1) {
696  LoadInst *NewLoad = combineLoadToNewType(IC, LI, ST->getTypeAtIndex(0U),
697  ".unpack");
698  AAMDNodes AAMD;
699  LI.getAAMetadata(AAMD);
700  NewLoad->setAAMetadata(AAMD);
702  UndefValue::get(T), NewLoad, 0, Name));
703  }
704 
705  // We don't want to break loads with padding here as we'd loose
706  // the knowledge that padding exists for the rest of the pipeline.
707  const DataLayout &DL = IC.getDataLayout();
708  auto *SL = DL.getStructLayout(ST);
709  if (SL->hasPadding())
710  return nullptr;
711 
712  auto Align = LI.getAlignment();
713  if (!Align)
715 
716  auto *Addr = LI.getPointerOperand();
717  auto *IdxType = Type::getInt32Ty(T->getContext());
718  auto *Zero = ConstantInt::get(IdxType, 0);
719 
720  Value *V = UndefValue::get(T);
721  for (unsigned i = 0; i < NumElements; i++) {
722  Value *Indices[2] = {
723  Zero,
724  ConstantInt::get(IdxType, i),
725  };
726  auto *Ptr = IC.Builder.CreateInBoundsGEP(ST, Addr, makeArrayRef(Indices),
727  Name + ".elt");
728  auto EltAlign = MinAlign(Align, SL->getElementOffset(i));
729  auto *L = IC.Builder.CreateAlignedLoad(Ptr, EltAlign, Name + ".unpack");
730  // Propagate AA metadata. It'll still be valid on the narrowed load.
731  AAMDNodes AAMD;
732  LI.getAAMetadata(AAMD);
733  L->setAAMetadata(AAMD);
734  V = IC.Builder.CreateInsertValue(V, L, i);
735  }
736 
737  V->setName(Name);
738  return IC.replaceInstUsesWith(LI, V);
739  }
740 
741  if (auto *AT = dyn_cast<ArrayType>(T)) {
742  auto *ET = AT->getElementType();
743  auto NumElements = AT->getNumElements();
744  if (NumElements == 1) {
745  LoadInst *NewLoad = combineLoadToNewType(IC, LI, ET, ".unpack");
746  AAMDNodes AAMD;
747  LI.getAAMetadata(AAMD);
748  NewLoad->setAAMetadata(AAMD);
750  UndefValue::get(T), NewLoad, 0, Name));
751  }
752 
753  // Bail out if the array is too large. Ideally we would like to optimize
754  // arrays of arbitrary size but this has a terrible impact on compile time.
755  // The threshold here is chosen arbitrarily, maybe needs a little bit of
756  // tuning.
757  if (NumElements > IC.MaxArraySizeForCombine)
758  return nullptr;
759 
760  const DataLayout &DL = IC.getDataLayout();
761  auto EltSize = DL.getTypeAllocSize(ET);
762  auto Align = LI.getAlignment();
763  if (!Align)
764  Align = DL.getABITypeAlignment(T);
765 
766  auto *Addr = LI.getPointerOperand();
767  auto *IdxType = Type::getInt64Ty(T->getContext());
768  auto *Zero = ConstantInt::get(IdxType, 0);
769 
770  Value *V = UndefValue::get(T);
771  uint64_t Offset = 0;
772  for (uint64_t i = 0; i < NumElements; i++) {
773  Value *Indices[2] = {
774  Zero,
775  ConstantInt::get(IdxType, i),
776  };
777  auto *Ptr = IC.Builder.CreateInBoundsGEP(AT, Addr, makeArrayRef(Indices),
778  Name + ".elt");
779  auto *L = IC.Builder.CreateAlignedLoad(Ptr, MinAlign(Align, Offset),
780  Name + ".unpack");
781  AAMDNodes AAMD;
782  LI.getAAMetadata(AAMD);
783  L->setAAMetadata(AAMD);
784  V = IC.Builder.CreateInsertValue(V, L, i);
785  Offset += EltSize;
786  }
787 
788  V->setName(Name);
789  return IC.replaceInstUsesWith(LI, V);
790  }
791 
792  return nullptr;
793 }
794 
795 // If we can determine that all possible objects pointed to by the provided
796 // pointer value are, not only dereferenceable, but also definitively less than
797 // or equal to the provided maximum size, then return true. Otherwise, return
798 // false (constant global values and allocas fall into this category).
799 //
800 // FIXME: This should probably live in ValueTracking (or similar).
801 static bool isObjectSizeLessThanOrEq(Value *V, uint64_t MaxSize,
802  const DataLayout &DL) {
803  SmallPtrSet<Value *, 4> Visited;
804  SmallVector<Value *, 4> Worklist(1, V);
805 
806  do {
807  Value *P = Worklist.pop_back_val();
808  P = P->stripPointerCasts();
809 
810  if (!Visited.insert(P).second)
811  continue;
812 
813  if (SelectInst *SI = dyn_cast<SelectInst>(P)) {
814  Worklist.push_back(SI->getTrueValue());
815  Worklist.push_back(SI->getFalseValue());
816  continue;
817  }
818 
819  if (PHINode *PN = dyn_cast<PHINode>(P)) {
820  for (Value *IncValue : PN->incoming_values())
821  Worklist.push_back(IncValue);
822  continue;
823  }
824 
825  if (GlobalAlias *GA = dyn_cast<GlobalAlias>(P)) {
826  if (GA->isInterposable())
827  return false;
828  Worklist.push_back(GA->getAliasee());
829  continue;
830  }
831 
832  // If we know how big this object is, and it is less than MaxSize, continue
833  // searching. Otherwise, return false.
834  if (AllocaInst *AI = dyn_cast<AllocaInst>(P)) {
835  if (!AI->getAllocatedType()->isSized())
836  return false;
837 
838  ConstantInt *CS = dyn_cast<ConstantInt>(AI->getArraySize());
839  if (!CS)
840  return false;
841 
842  uint64_t TypeSize = DL.getTypeAllocSize(AI->getAllocatedType());
843  // Make sure that, even if the multiplication below would wrap as an
844  // uint64_t, we still do the right thing.
845  if ((CS->getValue().zextOrSelf(128)*APInt(128, TypeSize)).ugt(MaxSize))
846  return false;
847  continue;
848  }
849 
850  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) {
851  if (!GV->hasDefinitiveInitializer() || !GV->isConstant())
852  return false;
853 
854  uint64_t InitSize = DL.getTypeAllocSize(GV->getValueType());
855  if (InitSize > MaxSize)
856  return false;
857  continue;
858  }
859 
860  return false;
861  } while (!Worklist.empty());
862 
863  return true;
864 }
865 
866 // If we're indexing into an object of a known size, and the outer index is
867 // not a constant, but having any value but zero would lead to undefined
868 // behavior, replace it with zero.
869 //
870 // For example, if we have:
871 // @f.a = private unnamed_addr constant [1 x i32] [i32 12], align 4
872 // ...
873 // %arrayidx = getelementptr inbounds [1 x i32]* @f.a, i64 0, i64 %x
874 // ... = load i32* %arrayidx, align 4
875 // Then we know that we can replace %x in the GEP with i64 0.
876 //
877 // FIXME: We could fold any GEP index to zero that would cause UB if it were
878 // not zero. Currently, we only handle the first such index. Also, we could
879 // also search through non-zero constant indices if we kept track of the
880 // offsets those indices implied.
882  Instruction *MemI, unsigned &Idx) {
883  if (GEPI->getNumOperands() < 2)
884  return false;
885 
886  // Find the first non-zero index of a GEP. If all indices are zero, return
887  // one past the last index.
888  auto FirstNZIdx = [](const GetElementPtrInst *GEPI) {
889  unsigned I = 1;
890  for (unsigned IE = GEPI->getNumOperands(); I != IE; ++I) {
891  Value *V = GEPI->getOperand(I);
892  if (const ConstantInt *CI = dyn_cast<ConstantInt>(V))
893  if (CI->isZero())
894  continue;
895 
896  break;
897  }
898 
899  return I;
900  };
901 
902  // Skip through initial 'zero' indices, and find the corresponding pointer
903  // type. See if the next index is not a constant.
904  Idx = FirstNZIdx(GEPI);
905  if (Idx == GEPI->getNumOperands())
906  return false;
907  if (isa<Constant>(GEPI->getOperand(Idx)))
908  return false;
909 
910  SmallVector<Value *, 4> Ops(GEPI->idx_begin(), GEPI->idx_begin() + Idx);
911  Type *AllocTy =
913  if (!AllocTy || !AllocTy->isSized())
914  return false;
915  const DataLayout &DL = IC.getDataLayout();
916  uint64_t TyAllocSize = DL.getTypeAllocSize(AllocTy);
917 
918  // If there are more indices after the one we might replace with a zero, make
919  // sure they're all non-negative. If any of them are negative, the overall
920  // address being computed might be before the base address determined by the
921  // first non-zero index.
922  auto IsAllNonNegative = [&]() {
923  for (unsigned i = Idx+1, e = GEPI->getNumOperands(); i != e; ++i) {
924  KnownBits Known = IC.computeKnownBits(GEPI->getOperand(i), 0, MemI);
925  if (Known.isNonNegative())
926  continue;
927  return false;
928  }
929 
930  return true;
931  };
932 
933  // FIXME: If the GEP is not inbounds, and there are extra indices after the
934  // one we'll replace, those could cause the address computation to wrap
935  // (rendering the IsAllNonNegative() check below insufficient). We can do
936  // better, ignoring zero indices (and other indices we can prove small
937  // enough not to wrap).
938  if (Idx+1 != GEPI->getNumOperands() && !GEPI->isInBounds())
939  return false;
940 
941  // Note that isObjectSizeLessThanOrEq will return true only if the pointer is
942  // also known to be dereferenceable.
943  return isObjectSizeLessThanOrEq(GEPI->getOperand(0), TyAllocSize, DL) &&
944  IsAllNonNegative();
945 }
946 
947 // If we're indexing into an object with a variable index for the memory
948 // access, but the object has only one element, we can assume that the index
949 // will always be zero. If we replace the GEP, return it.
950 template <typename T>
952  T &MemI) {
953  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Ptr)) {
954  unsigned Idx;
955  if (canReplaceGEPIdxWithZero(IC, GEPI, &MemI, Idx)) {
956  Instruction *NewGEPI = GEPI->clone();
957  NewGEPI->setOperand(Idx,
958  ConstantInt::get(GEPI->getOperand(Idx)->getType(), 0));
959  NewGEPI->insertBefore(GEPI);
960  MemI.setOperand(MemI.getPointerOperandIndex(), NewGEPI);
961  return NewGEPI;
962  }
963  }
964 
965  return nullptr;
966 }
967 
970  return false;
971 
972  auto *Ptr = SI.getPointerOperand();
973  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Ptr))
974  Ptr = GEPI->getOperand(0);
975  return (isa<ConstantPointerNull>(Ptr) &&
977 }
978 
980  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) {
981  const Value *GEPI0 = GEPI->getOperand(0);
982  if (isa<ConstantPointerNull>(GEPI0) &&
983  !NullPointerIsDefined(LI.getFunction(), GEPI->getPointerAddressSpace()))
984  return true;
985  }
986  if (isa<UndefValue>(Op) ||
987  (isa<ConstantPointerNull>(Op) &&
989  return true;
990  return false;
991 }
992 
994  Value *Op = LI.getOperand(0);
995 
996  // Try to canonicalize the loaded type.
997  if (Instruction *Res = combineLoadToOperationType(*this, LI))
998  return Res;
999 
1000  // Attempt to improve the alignment.
1001  unsigned KnownAlign = getOrEnforceKnownAlignment(
1002  Op, DL.getPrefTypeAlignment(LI.getType()), DL, &LI, &AC, &DT);
1003  unsigned LoadAlign = LI.getAlignment();
1004  unsigned EffectiveLoadAlign =
1005  LoadAlign != 0 ? LoadAlign : DL.getABITypeAlignment(LI.getType());
1006 
1007  if (KnownAlign > EffectiveLoadAlign)
1008  LI.setAlignment(KnownAlign);
1009  else if (LoadAlign == 0)
1010  LI.setAlignment(EffectiveLoadAlign);
1011 
1012  // Replace GEP indices if possible.
1013  if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Op, LI)) {
1014  Worklist.Add(NewGEPI);
1015  return &LI;
1016  }
1017 
1018  if (Instruction *Res = unpackLoadToAggregate(*this, LI))
1019  return Res;
1020 
1021  // Do really simple store-to-load forwarding and load CSE, to catch cases
1022  // where there are several consecutive memory accesses to the same location,
1023  // separated by a few arithmetic operations.
1024  BasicBlock::iterator BBI(LI);
1025  bool IsLoadCSE = false;
1026  if (Value *AvailableVal = FindAvailableLoadedValue(
1027  &LI, LI.getParent(), BBI, DefMaxInstsToScan, AA, &IsLoadCSE)) {
1028  if (IsLoadCSE)
1029  combineMetadataForCSE(cast<LoadInst>(AvailableVal), &LI, false);
1030 
1031  return replaceInstUsesWith(
1032  LI, Builder.CreateBitOrPointerCast(AvailableVal, LI.getType(),
1033  LI.getName() + ".cast"));
1034  }
1035 
1036  // None of the following transforms are legal for volatile/ordered atomic
1037  // loads. Most of them do apply for unordered atomics.
1038  if (!LI.isUnordered()) return nullptr;
1039 
1040  // load(gep null, ...) -> unreachable
1041  // load null/undef -> unreachable
1042  // TODO: Consider a target hook for valid address spaces for this xforms.
1043  if (canSimplifyNullLoadOrGEP(LI, Op)) {
1044  // Insert a new store to null instruction before the load to indicate
1045  // that this code is not reachable. We do this instead of inserting
1046  // an unreachable instruction directly because we cannot modify the
1047  // CFG.
1049  Constant::getNullValue(Op->getType()), &LI);
1050  SI->setDebugLoc(LI.getDebugLoc());
1051  return replaceInstUsesWith(LI, UndefValue::get(LI.getType()));
1052  }
1053 
1054  if (Op->hasOneUse()) {
1055  // Change select and PHI nodes to select values instead of addresses: this
1056  // helps alias analysis out a lot, allows many others simplifications, and
1057  // exposes redundancy in the code.
1058  //
1059  // Note that we cannot do the transformation unless we know that the
1060  // introduced loads cannot trap! Something like this is valid as long as
1061  // the condition is always false: load (select bool %C, int* null, int* %G),
1062  // but it would not be valid if we transformed it to load from null
1063  // unconditionally.
1064  //
1065  if (SelectInst *SI = dyn_cast<SelectInst>(Op)) {
1066  // load (select (Cond, &V1, &V2)) --> select(Cond, load &V1, load &V2).
1067  unsigned Align = LI.getAlignment();
1068  if (isSafeToLoadUnconditionally(SI->getOperand(1), Align, DL, SI) &&
1069  isSafeToLoadUnconditionally(SI->getOperand(2), Align, DL, SI)) {
1070  LoadInst *V1 = Builder.CreateLoad(SI->getOperand(1),
1071  SI->getOperand(1)->getName()+".val");
1072  LoadInst *V2 = Builder.CreateLoad(SI->getOperand(2),
1073  SI->getOperand(2)->getName()+".val");
1074  assert(LI.isUnordered() && "implied by above");
1075  V1->setAlignment(Align);
1076  V1->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
1077  V2->setAlignment(Align);
1078  V2->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
1079  return SelectInst::Create(SI->getCondition(), V1, V2);
1080  }
1081 
1082  // load (select (cond, null, P)) -> load P
1083  if (isa<ConstantPointerNull>(SI->getOperand(1)) &&
1084  !NullPointerIsDefined(SI->getFunction(),
1085  LI.getPointerAddressSpace())) {
1086  LI.setOperand(0, SI->getOperand(2));
1087  return &LI;
1088  }
1089 
1090  // load (select (cond, P, null)) -> load P
1091  if (isa<ConstantPointerNull>(SI->getOperand(2)) &&
1092  !NullPointerIsDefined(SI->getFunction(),
1093  LI.getPointerAddressSpace())) {
1094  LI.setOperand(0, SI->getOperand(1));
1095  return &LI;
1096  }
1097  }
1098  }
1099  return nullptr;
1100 }
1101 
1102 /// Look for extractelement/insertvalue sequence that acts like a bitcast.
1103 ///
1104 /// \returns underlying value that was "cast", or nullptr otherwise.
1105 ///
1106 /// For example, if we have:
1107 ///
1108 /// %E0 = extractelement <2 x double> %U, i32 0
1109 /// %V0 = insertvalue [2 x double] undef, double %E0, 0
1110 /// %E1 = extractelement <2 x double> %U, i32 1
1111 /// %V1 = insertvalue [2 x double] %V0, double %E1, 1
1112 ///
1113 /// and the layout of a <2 x double> is isomorphic to a [2 x double],
1114 /// then %V1 can be safely approximated by a conceptual "bitcast" of %U.
1115 /// Note that %U may contain non-undef values where %V1 has undef.
1117  Value *U = nullptr;
1118  while (auto *IV = dyn_cast<InsertValueInst>(V)) {
1119  auto *E = dyn_cast<ExtractElementInst>(IV->getInsertedValueOperand());
1120  if (!E)
1121  return nullptr;
1122  auto *W = E->getVectorOperand();
1123  if (!U)
1124  U = W;
1125  else if (U != W)
1126  return nullptr;
1127  auto *CI = dyn_cast<ConstantInt>(E->getIndexOperand());
1128  if (!CI || IV->getNumIndices() != 1 || CI->getZExtValue() != *IV->idx_begin())
1129  return nullptr;
1130  V = IV->getAggregateOperand();
1131  }
1132  if (!isa<UndefValue>(V) ||!U)
1133  return nullptr;
1134 
1135  auto *UT = cast<VectorType>(U->getType());
1136  auto *VT = V->getType();
1137  // Check that types UT and VT are bitwise isomorphic.
1138  const auto &DL = IC.getDataLayout();
1139  if (DL.getTypeStoreSizeInBits(UT) != DL.getTypeStoreSizeInBits(VT)) {
1140  return nullptr;
1141  }
1142  if (auto *AT = dyn_cast<ArrayType>(VT)) {
1143  if (AT->getNumElements() != UT->getNumElements())
1144  return nullptr;
1145  } else {
1146  auto *ST = cast<StructType>(VT);
1147  if (ST->getNumElements() != UT->getNumElements())
1148  return nullptr;
1149  for (const auto *EltT : ST->elements()) {
1150  if (EltT != UT->getElementType())
1151  return nullptr;
1152  }
1153  }
1154  return U;
1155 }
1156 
1157 /// Combine stores to match the type of value being stored.
1158 ///
1159 /// The core idea here is that the memory does not have any intrinsic type and
1160 /// where we can we should match the type of a store to the type of value being
1161 /// stored.
1162 ///
1163 /// However, this routine must never change the width of a store or the number of
1164 /// stores as that would introduce a semantic change. This combine is expected to
1165 /// be a semantic no-op which just allows stores to more closely model the types
1166 /// of their incoming values.
1167 ///
1168 /// Currently, we also refuse to change the precise type used for an atomic or
1169 /// volatile store. This is debatable, and might be reasonable to change later.
1170 /// However, it is risky in case some backend or other part of LLVM is relying
1171 /// on the exact type stored to select appropriate atomic operations.
1172 ///
1173 /// \returns true if the store was successfully combined away. This indicates
1174 /// the caller must erase the store instruction. We have to let the caller erase
1175 /// the store instruction as otherwise there is no way to signal whether it was
1176 /// combined or not: IC.EraseInstFromFunction returns a null pointer.
1178  // FIXME: We could probably with some care handle both volatile and ordered
1179  // atomic stores here but it isn't clear that this is important.
1180  if (!SI.isUnordered())
1181  return false;
1182 
1183  // swifterror values can't be bitcasted.
1184  if (SI.getPointerOperand()->isSwiftError())
1185  return false;
1186 
1187  Value *V = SI.getValueOperand();
1188 
1189  // Fold away bit casts of the stored value by storing the original type.
1190  if (auto *BC = dyn_cast<BitCastInst>(V)) {
1191  V = BC->getOperand(0);
1192  if (!SI.isAtomic() || isSupportedAtomicType(V->getType())) {
1193  combineStoreToNewValue(IC, SI, V);
1194  return true;
1195  }
1196  }
1197 
1198  if (Value *U = likeBitCastFromVector(IC, V))
1199  if (!SI.isAtomic() || isSupportedAtomicType(U->getType())) {
1200  combineStoreToNewValue(IC, SI, U);
1201  return true;
1202  }
1203 
1204  // FIXME: We should also canonicalize stores of vectors when their elements
1205  // are cast to other types.
1206  return false;
1207 }
1208 
1210  // FIXME: We could probably with some care handle both volatile and atomic
1211  // stores here but it isn't clear that this is important.
1212  if (!SI.isSimple())
1213  return false;
1214 
1215  Value *V = SI.getValueOperand();
1216  Type *T = V->getType();
1217 
1218  if (!T->isAggregateType())
1219  return false;
1220 
1221  if (auto *ST = dyn_cast<StructType>(T)) {
1222  // If the struct only have one element, we unpack.
1223  unsigned Count = ST->getNumElements();
1224  if (Count == 1) {
1225  V = IC.Builder.CreateExtractValue(V, 0);
1226  combineStoreToNewValue(IC, SI, V);
1227  return true;
1228  }
1229 
1230  // We don't want to break loads with padding here as we'd loose
1231  // the knowledge that padding exists for the rest of the pipeline.
1232  const DataLayout &DL = IC.getDataLayout();
1233  auto *SL = DL.getStructLayout(ST);
1234  if (SL->hasPadding())
1235  return false;
1236 
1237  auto Align = SI.getAlignment();
1238  if (!Align)
1240 
1241  SmallString<16> EltName = V->getName();
1242  EltName += ".elt";
1243  auto *Addr = SI.getPointerOperand();
1244  SmallString<16> AddrName = Addr->getName();
1245  AddrName += ".repack";
1246 
1247  auto *IdxType = Type::getInt32Ty(ST->getContext());
1248  auto *Zero = ConstantInt::get(IdxType, 0);
1249  for (unsigned i = 0; i < Count; i++) {
1250  Value *Indices[2] = {
1251  Zero,
1252  ConstantInt::get(IdxType, i),
1253  };
1254  auto *Ptr = IC.Builder.CreateInBoundsGEP(ST, Addr, makeArrayRef(Indices),
1255  AddrName);
1256  auto *Val = IC.Builder.CreateExtractValue(V, i, EltName);
1257  auto EltAlign = MinAlign(Align, SL->getElementOffset(i));
1258  llvm::Instruction *NS = IC.Builder.CreateAlignedStore(Val, Ptr, EltAlign);
1259  AAMDNodes AAMD;
1260  SI.getAAMetadata(AAMD);
1261  NS->setAAMetadata(AAMD);
1262  }
1263 
1264  return true;
1265  }
1266 
1267  if (auto *AT = dyn_cast<ArrayType>(T)) {
1268  // If the array only have one element, we unpack.
1269  auto NumElements = AT->getNumElements();
1270  if (NumElements == 1) {
1271  V = IC.Builder.CreateExtractValue(V, 0);
1272  combineStoreToNewValue(IC, SI, V);
1273  return true;
1274  }
1275 
1276  // Bail out if the array is too large. Ideally we would like to optimize
1277  // arrays of arbitrary size but this has a terrible impact on compile time.
1278  // The threshold here is chosen arbitrarily, maybe needs a little bit of
1279  // tuning.
1280  if (NumElements > IC.MaxArraySizeForCombine)
1281  return false;
1282 
1283  const DataLayout &DL = IC.getDataLayout();
1284  auto EltSize = DL.getTypeAllocSize(AT->getElementType());
1285  auto Align = SI.getAlignment();
1286  if (!Align)
1287  Align = DL.getABITypeAlignment(T);
1288 
1289  SmallString<16> EltName = V->getName();
1290  EltName += ".elt";
1291  auto *Addr = SI.getPointerOperand();
1292  SmallString<16> AddrName = Addr->getName();
1293  AddrName += ".repack";
1294 
1295  auto *IdxType = Type::getInt64Ty(T->getContext());
1296  auto *Zero = ConstantInt::get(IdxType, 0);
1297 
1298  uint64_t Offset = 0;
1299  for (uint64_t i = 0; i < NumElements; i++) {
1300  Value *Indices[2] = {
1301  Zero,
1302  ConstantInt::get(IdxType, i),
1303  };
1304  auto *Ptr = IC.Builder.CreateInBoundsGEP(AT, Addr, makeArrayRef(Indices),
1305  AddrName);
1306  auto *Val = IC.Builder.CreateExtractValue(V, i, EltName);
1307  auto EltAlign = MinAlign(Align, Offset);
1308  Instruction *NS = IC.Builder.CreateAlignedStore(Val, Ptr, EltAlign);
1309  AAMDNodes AAMD;
1310  SI.getAAMetadata(AAMD);
1311  NS->setAAMetadata(AAMD);
1312  Offset += EltSize;
1313  }
1314 
1315  return true;
1316  }
1317 
1318  return false;
1319 }
1320 
1321 /// equivalentAddressValues - Test if A and B will obviously have the same
1322 /// value. This includes recognizing that %t0 and %t1 will have the same
1323 /// value in code like this:
1324 /// %t0 = getelementptr \@a, 0, 3
1325 /// store i32 0, i32* %t0
1326 /// %t1 = getelementptr \@a, 0, 3
1327 /// %t2 = load i32* %t1
1328 ///
1330  // Test if the values are trivially equivalent.
1331  if (A == B) return true;
1332 
1333  // Test if the values come form identical arithmetic instructions.
1334  // This uses isIdenticalToWhenDefined instead of isIdenticalTo because
1335  // its only used to compare two uses within the same basic block, which
1336  // means that they'll always either have the same value or one of them
1337  // will have an undefined value.
1338  if (isa<BinaryOperator>(A) ||
1339  isa<CastInst>(A) ||
1340  isa<PHINode>(A) ||
1341  isa<GetElementPtrInst>(A))
1342  if (Instruction *BI = dyn_cast<Instruction>(B))
1343  if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI))
1344  return true;
1345 
1346  // Otherwise they may not be equivalent.
1347  return false;
1348 }
1349 
1350 /// Converts store (bitcast (load (bitcast (select ...)))) to
1351 /// store (load (select ...)), where select is minmax:
1352 /// select ((cmp load V1, load V2), V1, V2).
1354  StoreInst &SI) {
1355  // bitcast?
1356  if (!match(SI.getPointerOperand(), m_BitCast(m_Value())))
1357  return false;
1358  // load? integer?
1359  Value *LoadAddr;
1360  if (!match(SI.getValueOperand(), m_Load(m_BitCast(m_Value(LoadAddr)))))
1361  return false;
1362  auto *LI = cast<LoadInst>(SI.getValueOperand());
1363  if (!LI->getType()->isIntegerTy())
1364  return false;
1365  if (!isMinMaxWithLoads(LoadAddr))
1366  return false;
1367 
1368  if (!all_of(LI->users(), [LI, LoadAddr](User *U) {
1369  auto *SI = dyn_cast<StoreInst>(U);
1370  return SI && SI->getPointerOperand() != LI &&
1371  peekThroughBitcast(SI->getPointerOperand()) != LoadAddr &&
1372  !SI->getPointerOperand()->isSwiftError();
1373  }))
1374  return false;
1375 
1376  IC.Builder.SetInsertPoint(LI);
1377  LoadInst *NewLI = combineLoadToNewType(
1378  IC, *LI, LoadAddr->getType()->getPointerElementType());
1379  // Replace all the stores with stores of the newly loaded value.
1380  for (auto *UI : LI->users()) {
1381  auto *USI = cast<StoreInst>(UI);
1382  IC.Builder.SetInsertPoint(USI);
1383  combineStoreToNewValue(IC, *USI, NewLI);
1384  }
1385  IC.replaceInstUsesWith(*LI, UndefValue::get(LI->getType()));
1386  IC.eraseInstFromFunction(*LI);
1387  return true;
1388 }
1389 
1391  Value *Val = SI.getOperand(0);
1392  Value *Ptr = SI.getOperand(1);
1393 
1394  // Try to canonicalize the stored type.
1395  if (combineStoreToValueType(*this, SI))
1396  return eraseInstFromFunction(SI);
1397 
1398  // Attempt to improve the alignment.
1399  unsigned KnownAlign = getOrEnforceKnownAlignment(
1400  Ptr, DL.getPrefTypeAlignment(Val->getType()), DL, &SI, &AC, &DT);
1401  unsigned StoreAlign = SI.getAlignment();
1402  unsigned EffectiveStoreAlign =
1403  StoreAlign != 0 ? StoreAlign : DL.getABITypeAlignment(Val->getType());
1404 
1405  if (KnownAlign > EffectiveStoreAlign)
1406  SI.setAlignment(KnownAlign);
1407  else if (StoreAlign == 0)
1408  SI.setAlignment(EffectiveStoreAlign);
1409 
1410  // Try to canonicalize the stored type.
1411  if (unpackStoreToAggregate(*this, SI))
1412  return eraseInstFromFunction(SI);
1413 
1414  if (removeBitcastsFromLoadStoreOnMinMax(*this, SI))
1415  return eraseInstFromFunction(SI);
1416 
1417  // Replace GEP indices if possible.
1418  if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Ptr, SI)) {
1419  Worklist.Add(NewGEPI);
1420  return &SI;
1421  }
1422 
1423  // Don't hack volatile/ordered stores.
1424  // FIXME: Some bits are legal for ordered atomic stores; needs refactoring.
1425  if (!SI.isUnordered()) return nullptr;
1426 
1427  // If the RHS is an alloca with a single use, zapify the store, making the
1428  // alloca dead.
1429  if (Ptr->hasOneUse()) {
1430  if (isa<AllocaInst>(Ptr))
1431  return eraseInstFromFunction(SI);
1432  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
1433  if (isa<AllocaInst>(GEP->getOperand(0))) {
1434  if (GEP->getOperand(0)->hasOneUse())
1435  return eraseInstFromFunction(SI);
1436  }
1437  }
1438  }
1439 
1440  // Do really simple DSE, to catch cases where there are several consecutive
1441  // stores to the same location, separated by a few arithmetic operations. This
1442  // situation often occurs with bitfield accesses.
1443  BasicBlock::iterator BBI(SI);
1444  for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts;
1445  --ScanInsts) {
1446  --BBI;
1447  // Don't count debug info directives, lest they affect codegen,
1448  // and we skip pointer-to-pointer bitcasts, which are NOPs.
1449  if (isa<DbgInfoIntrinsic>(BBI) ||
1450  (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
1451  ScanInsts++;
1452  continue;
1453  }
1454 
1455  if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) {
1456  // Prev store isn't volatile, and stores to the same location?
1457  if (PrevSI->isUnordered() && equivalentAddressValues(PrevSI->getOperand(1),
1458  SI.getOperand(1))) {
1459  ++NumDeadStore;
1460  ++BBI;
1461  eraseInstFromFunction(*PrevSI);
1462  continue;
1463  }
1464  break;
1465  }
1466 
1467  // If this is a load, we have to stop. However, if the loaded value is from
1468  // the pointer we're loading and is producing the pointer we're storing,
1469  // then *this* store is dead (X = load P; store X -> P).
1470  if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
1471  if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr)) {
1472  assert(SI.isUnordered() && "can't eliminate ordering operation");
1473  return eraseInstFromFunction(SI);
1474  }
1475 
1476  // Otherwise, this is a load from some other location. Stores before it
1477  // may not be dead.
1478  break;
1479  }
1480 
1481  // Don't skip over loads, throws or things that can modify memory.
1482  if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory() || BBI->mayThrow())
1483  break;
1484  }
1485 
1486  // store X, null -> turns into 'unreachable' in SimplifyCFG
1487  // store X, GEP(null, Y) -> turns into 'unreachable' in SimplifyCFG
1488  if (canSimplifyNullStoreOrGEP(SI)) {
1489  if (!isa<UndefValue>(Val)) {
1490  SI.setOperand(0, UndefValue::get(Val->getType()));
1491  if (Instruction *U = dyn_cast<Instruction>(Val))
1492  Worklist.Add(U); // Dropped a use.
1493  }
1494  return nullptr; // Do not modify these!
1495  }
1496 
1497  // store undef, Ptr -> noop
1498  if (isa<UndefValue>(Val))
1499  return eraseInstFromFunction(SI);
1500 
1501  // If this store is the last instruction in the basic block (possibly
1502  // excepting debug info instructions), and if the block ends with an
1503  // unconditional branch, try to move it to the successor block.
1504  BBI = SI.getIterator();
1505  do {
1506  ++BBI;
1507  } while (isa<DbgInfoIntrinsic>(BBI) ||
1508  (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy()));
1509  if (BranchInst *BI = dyn_cast<BranchInst>(BBI))
1510  if (BI->isUnconditional())
1511  if (SimplifyStoreAtEndOfBlock(SI))
1512  return nullptr; // xform done!
1513 
1514  return nullptr;
1515 }
1516 
1517 /// SimplifyStoreAtEndOfBlock - Turn things like:
1518 /// if () { *P = v1; } else { *P = v2 }
1519 /// into a phi node with a store in the successor.
1520 ///
1521 /// Simplify things like:
1522 /// *P = v1; if () { *P = v2; }
1523 /// into a phi node with a store in the successor.
1524 ///
1525 bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
1526  assert(SI.isUnordered() &&
1527  "this code has not been auditted for volatile or ordered store case");
1528 
1529  BasicBlock *StoreBB = SI.getParent();
1530 
1531  // Check to see if the successor block has exactly two incoming edges. If
1532  // so, see if the other predecessor contains a store to the same location.
1533  // if so, insert a PHI node (if needed) and move the stores down.
1534  BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0);
1535 
1536  // Determine whether Dest has exactly two predecessors and, if so, compute
1537  // the other predecessor.
1538  pred_iterator PI = pred_begin(DestBB);
1539  BasicBlock *P = *PI;
1540  BasicBlock *OtherBB = nullptr;
1541 
1542  if (P != StoreBB)
1543  OtherBB = P;
1544 
1545  if (++PI == pred_end(DestBB))
1546  return false;
1547 
1548  P = *PI;
1549  if (P != StoreBB) {
1550  if (OtherBB)
1551  return false;
1552  OtherBB = P;
1553  }
1554  if (++PI != pred_end(DestBB))
1555  return false;
1556 
1557  // Bail out if all the relevant blocks aren't distinct (this can happen,
1558  // for example, if SI is in an infinite loop)
1559  if (StoreBB == DestBB || OtherBB == DestBB)
1560  return false;
1561 
1562  // Verify that the other block ends in a branch and is not otherwise empty.
1563  BasicBlock::iterator BBI(OtherBB->getTerminator());
1564  BranchInst *OtherBr = dyn_cast<BranchInst>(BBI);
1565  if (!OtherBr || BBI == OtherBB->begin())
1566  return false;
1567 
1568  // If the other block ends in an unconditional branch, check for the 'if then
1569  // else' case. there is an instruction before the branch.
1570  StoreInst *OtherStore = nullptr;
1571  if (OtherBr->isUnconditional()) {
1572  --BBI;
1573  // Skip over debugging info.
1574  while (isa<DbgInfoIntrinsic>(BBI) ||
1575  (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
1576  if (BBI==OtherBB->begin())
1577  return false;
1578  --BBI;
1579  }
1580  // If this isn't a store, isn't a store to the same location, or is not the
1581  // right kind of store, bail out.
1582  OtherStore = dyn_cast<StoreInst>(BBI);
1583  if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1) ||
1584  !SI.isSameOperationAs(OtherStore))
1585  return false;
1586  } else {
1587  // Otherwise, the other block ended with a conditional branch. If one of the
1588  // destinations is StoreBB, then we have the if/then case.
1589  if (OtherBr->getSuccessor(0) != StoreBB &&
1590  OtherBr->getSuccessor(1) != StoreBB)
1591  return false;
1592 
1593  // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an
1594  // if/then triangle. See if there is a store to the same ptr as SI that
1595  // lives in OtherBB.
1596  for (;; --BBI) {
1597  // Check to see if we find the matching store.
1598  if ((OtherStore = dyn_cast<StoreInst>(BBI))) {
1599  if (OtherStore->getOperand(1) != SI.getOperand(1) ||
1600  !SI.isSameOperationAs(OtherStore))
1601  return false;
1602  break;
1603  }
1604  // If we find something that may be using or overwriting the stored
1605  // value, or if we run out of instructions, we can't do the xform.
1606  if (BBI->mayReadFromMemory() || BBI->mayThrow() ||
1607  BBI->mayWriteToMemory() || BBI == OtherBB->begin())
1608  return false;
1609  }
1610 
1611  // In order to eliminate the store in OtherBr, we have to
1612  // make sure nothing reads or overwrites the stored value in
1613  // StoreBB.
1614  for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) {
1615  // FIXME: This should really be AA driven.
1616  if (I->mayReadFromMemory() || I->mayThrow() || I->mayWriteToMemory())
1617  return false;
1618  }
1619  }
1620 
1621  // Insert a PHI node now if we need it.
1622  Value *MergedVal = OtherStore->getOperand(0);
1623  if (MergedVal != SI.getOperand(0)) {
1624  PHINode *PN = PHINode::Create(MergedVal->getType(), 2, "storemerge");
1625  PN->addIncoming(SI.getOperand(0), SI.getParent());
1626  PN->addIncoming(OtherStore->getOperand(0), OtherBB);
1627  MergedVal = InsertNewInstBefore(PN, DestBB->front());
1628  }
1629 
1630  // Advance to a place where it is safe to insert the new store and
1631  // insert it.
1632  BBI = DestBB->getFirstInsertionPt();
1633  StoreInst *NewSI = new StoreInst(MergedVal, SI.getOperand(1),
1634  SI.isVolatile(),
1635  SI.getAlignment(),
1636  SI.getOrdering(),
1637  SI.getSyncScopeID());
1638  InsertNewInstBefore(NewSI, *BBI);
1639  // The debug locations of the original instructions might differ; merge them.
1640  NewSI->applyMergedLocation(SI.getDebugLoc(), OtherStore->getDebugLoc());
1641 
1642  // If the two stores had AA tags, merge them.
1643  AAMDNodes AATags;
1644  SI.getAAMetadata(AATags);
1645  if (AATags) {
1646  OtherStore->getAAMetadata(AATags, /* Merge = */ true);
1647  NewSI->setAAMetadata(AATags);
1648  }
1649 
1650  // Nuke the old stores.
1651  eraseInstFromFunction(SI);
1652  eraseInstFromFunction(*OtherStore);
1653  return true;
1654 }
Value * CreateInBoundsGEP(Value *Ptr, ArrayRef< Value *> IdxList, const Twine &Name="")
Definition: IRBuilder.h:1405
uint64_t CallInst * C
Value * getValueOperand()
Definition: Instructions.h:399
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth, const Instruction *CxtI) const
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
uint64_t getTypeStoreSizeInBits(Type *Ty) const
Returns the maximum number of bits that may be overwritten by storing the specified type; always a mu...
Definition: DataLayout.h:419
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:72
bool isSimple() const
Definition: Instructions.h:266
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
Definition: PatternMatch.h:80
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
unsigned getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
Definition: Local.cpp:1175
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
AllocaInst * CreateAlloca(Type *Ty, unsigned AddrSpace, Value *ArraySize=nullptr, const Twine &Name="")
Definition: IRBuilder.h:1304
static Constant * getPointerBitCastOrAddrSpaceCast(Constant *C, Type *Ty)
Create a BitCast or AddrSpaceCast for a pointer type depending on the address space.
Definition: Constants.cpp:1579
bool isSameOperationAs(const Instruction *I, unsigned flags=0) const
This function determines if the specified instruction executes the same operation as the current one...
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
bool isAtomic() const
Return true if this instruction has an AtomicOrdering of unordered or higher.
bool isSized(SmallPtrSetImpl< Type *> *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:265
static bool canSimplifyNullStoreOrGEP(StoreInst &SI)
BasicBlock * getSuccessor(unsigned Idx) const
Return the specified successor. This instruction must be a terminator.
void setAlignment(unsigned Align)
bool isUnordered() const
Definition: Instructions.h:393
const StructLayout * getStructLayout(StructType *Ty) const
Returns a StructLayout object, indicating the alignment of the struct, its size, and the offsets of i...
Definition: DataLayout.cpp:588
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value *> IdxList, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:869
cl::opt< unsigned > DefMaxInstsToScan
The default number of maximum instructions to scan in the block, used by FindAvailableLoadedValue().
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
Definition: Type.cpp:617
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this store instruction.
Definition: Instructions.h:374
bool mayWriteToMemory() const
Return true if this instruction may modify memory.
static bool isDereferenceableForAllocaSize(const Value *V, const AllocaInst *AI, const DataLayout &DL)
Returns true if V is dereferenceable for size of alloca.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
Definition: Instructions.h:237
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", Instruction *InsertBefore=nullptr, Instruction *MDFrom=nullptr)
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:714
This class implements a map that also provides access to all stored values in a deterministic order...
Definition: MapVector.h:38
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1042
STATISTIC(NumFunctions, "Total number of functions")
Metadata node.
Definition: Metadata.h:864
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:503
static LoadInst * combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewTy, const Twine &Suffix="")
Helper to combine a load to a new type.
An instruction for reading from memory.
Definition: Instructions.h:168
static bool pointsToConstantGlobal(Value *V)
pointsToConstantGlobal - Return true if V (possibly indirectly) points to some part of a constant glo...
uint64_t MaxArraySizeForCombine
Maximum size of array considered when transforming.
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:177
Hexagon Common GEP
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
Definition: Instructions.h:385
bool isSafeToLoadUnconditionally(Value *V, unsigned Align, const DataLayout &DL, Instruction *ScanFrom=nullptr, const DominatorTree *DT=nullptr)
Return true if we know that executing a load from this value cannot trap.
Definition: Loads.cpp:201
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:130
static Constant * getNullValue(Type *Ty)
Constructor to create a &#39;0&#39; constant of arbitrary type.
Definition: Constants.cpp:268
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:264
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, unsigned Align, bool isVolatile=false)
Definition: IRBuilder.h:1358
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
Definition: Instructions.h:260
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
bool isVolatile() const
Return true if this is a load from a volatile memory location.
Definition: Instructions.h:221
amdgpu Simplify well known AMD library false Value Value const Twine & Name
This class represents the LLVM &#39;select&#39; instruction.
Type * getPointerElementType() const
Definition: Type.h:376
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
unsigned getAlignment() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:113
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:451
PointerType * getType() const
Overload to return most specific pointer type.
Definition: Instructions.h:97
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:162
Value * FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan=DefMaxInstsToScan, AliasAnalysis *AA=nullptr, bool *IsLoadCSE=nullptr, unsigned *NumScanedInst=nullptr)
Scan backwards to see if we have the value of the given load available locally within a small number ...
Definition: Loads.cpp:321
APInt zextOrSelf(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:892
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
Definition: Type.cpp:639
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
Instruction * eraseInstFromFunction(Instruction &I)
Combiner aware instruction erasure.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:197
static Instruction * replaceGEPIdxWithZero(InstCombiner &IC, Value *Ptr, T &MemI)
The core instruction combiner logic.
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:295
Type * getSourceElementType() const
Definition: Instructions.h:940
Instruction * clone() const
Create a copy of &#39;this&#39; instruction that is identical in all ways except the following: ...
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:408
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:885
static Value * peekThroughBitcast(Value *V, bool OneUseOnly=false)
Return the source operand of a potentially bitcasted value while optionally checking if it has one us...
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1641
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
void copyNonnullMetadata(const LoadInst &OldLI, MDNode *N, LoadInst &NewLI)
Copy a nonnull metadata node to a new load instruction.
Definition: Local.cpp:2482
bool isInBounds() const
Determine whether the GEP has the inbounds flag.
static Instruction * simplifyAllocaArraySize(InstCombiner &IC, AllocaInst &AI)
bool isSwiftError() const
Return true if this value is a swifterror value.
Definition: Value.cpp:735
bool isIntOrPtrTy() const
Return true if this is an integer type or a pointer type.
Definition: Type.h:212
This class represents a no-op cast from one type to another.
op_iterator idx_begin()
Definition: Instructions.h:968
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:138
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
An instruction for storing to memory.
Definition: Instructions.h:310
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:439
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:301
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block...
Definition: IRBuilder.h:127
Value * getOperand(unsigned i) const
Definition: User.h:170
const DataLayout & getDataLayout() const
const BasicBlock & getEntryBlock() const
Definition: Function.h:640
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:610
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:843
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space...
Definition: DataLayout.cpp:742
void getAAMetadata(AAMDNodes &N, bool Merge=false) const
Fills the AAMDNodes structure with AA metadata from this instruction.
#define P(N)
static Instruction * combineLoadToOperationType(InstCombiner &IC, LoadInst &LI)
Combine loads to match the type of their uses&#39; value after looking through intervening bitcasts...
static bool equivalentAddressValues(Value *A, Value *B)
equivalentAddressValues - Test if A and B will obviously have the same value.
static bool unpackStoreToAggregate(InstCombiner &IC, StoreInst &SI)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:218
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:304
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction...
Definition: Instruction.cpp:74
void setAAMetadata(const AAMDNodes &N)
Sets the metadata on this instruction from the AAMDNodes structure.
Definition: Metadata.cpp:1265
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
Conditional or Unconditional Branch instruction.
CastClass_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:42
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:224
const Instruction & front() const
Definition: BasicBlock.h:276
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
static StoreInst * combineStoreToNewValue(InstCombiner &IC, StoreInst &SI, Value *V)
Combine a store to a new type.
bool mayThrow() const
Return true if this instruction may throw an exception.
Interval::pred_iterator pred_begin(Interval *I)
pred_begin/pred_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:113
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:499
bool isUnordered() const
Definition: Instructions.h:268
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:685
static void replace(Module &M, GlobalVariable *Old, GlobalVariable *New)
void copyRangeMetadata(const DataLayout &DL, const LoadInst &OldLI, MDNode *N, LoadInst &NewLI)
Copy a range metadata node to a new load instruction.
Definition: Local.cpp:2507
Instruction * visitAllocaInst(AllocaInst &AI)
Interval::pred_iterator pred_end(Interval *I)
Definition: Interval.h:116
Value * getPointerOperand()
Definition: Instructions.h:274
self_iterator getIterator()
Definition: ilist_node.h:82
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:60
void setAlignment(unsigned Align)
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1392
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs, and aliases.
Definition: Value.cpp:539
const Value * getArraySize() const
Get the number of elements allocated.
Definition: Instructions.h:93
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:1975
size_t size() const
Definition: SmallVector.h:53
bool isVolatile() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1226
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
Definition: Instructions.h:106
static bool canSimplifyNullLoadOrGEP(LoadInst &LI, Value *Op)
static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, SmallVectorImpl< Instruction *> &ToDelete)
isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived) pointer to an alloca...
Iterator for intrusive lists based on ilist_node.
unsigned getNumOperands() const
Definition: User.h:192
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
Definition: IRBuilder.h:1710
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:64
static bool isSupportedAtomicType(Type *Ty)
bool isLegalInteger(uint64_t Width) const
Returns true if the specified type is known to be a native integer type supported by the CPU...
Definition: DataLayout.h:243
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:722
bool isAggregateType() const
Return true if the type is an aggregate type.
Definition: Type.h:258
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:644
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:381
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:307
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition: Type.cpp:180
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:621
void applyMergedLocation(const DILocation *LocA, const DILocation *LocB)
Merge 2 debug locations and apply it to the Instruction.
Definition: DebugInfo.cpp:691
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
Definition: Function.cpp:1440
static bool combineStoreToValueType(InstCombiner &IC, StoreInst &SI)
Combine stores to match the type of value being stored.
void getAllMetadata(SmallVectorImpl< std::pair< unsigned, MDNode *>> &MDs) const
Get all metadata attached to this Instruction.
Definition: Instruction.h:233
void setOperand(unsigned i, Value *Val)
Definition: User.h:175
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
Class for arbitrary precision integers.
Definition: APInt.h:70
iterator_range< user_iterator > users()
Definition: Value.h:400
static bool isObjectSizeLessThanOrEq(Value *V, uint64_t MaxSize, const DataLayout &DL)
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:394
bool isNonIntegralPointerType(PointerType *PT) const
Definition: DataLayout.h:346
uint64_t getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:560
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:428
This class wraps the llvm.memcpy/memmove intrinsics.
bool isVolatile() const
Return true if this is a store to a volatile memory location.
Definition: Instructions.h:343
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:307
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:230
void emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:652
static Type * getIndexedType(Type *Ty, ArrayRef< Value *> IdxList)
Returns the type of the element that would be loaded with a load instruction with the specified param...
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:176
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:56
AtomicOrdering getOrdering() const
Returns the ordering constraint of this store instruction.
Definition: Instructions.h:362
static bool removeBitcastsFromLoadStoreOnMinMax(InstCombiner &IC, StoreInst &SI)
Converts store (bitcast (load (bitcast (select ...)))) to store (load (select ...)), where select is minmax: select ((cmp load V1, load V2), V1, V2).
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:224
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:108
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Definition: Instructions.h:249
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
static Value * likeBitCastFromVector(InstCombiner &IC, Value *V)
Look for extractelement/insertvalue sequence that acts like a bitcast.
bool mayReadFromMemory() const
Return true if this instruction may read memory.
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Definition: Type.cpp:568
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
This instruction extracts a single (scalar) element from a VectorType value.
Instruction * InsertNewInstBefore(Instruction *New, Instruction &Old)
Inserts an instruction New before instruction Old.
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:355
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:280
static GetElementPtrInst * CreateInBounds(Value *Ptr, ArrayRef< Value *> IdxList, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Create an "inbounds" getelementptr.
Definition: Instructions.h:903
bool isArrayAllocation() const
Return true if there is an allocation size parameter to the allocation instruction that is not 1...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
user_iterator user_begin()
Definition: Value.h:376
static bool canReplaceGEPIdxWithZero(InstCombiner &IC, GetElementPtrInst *GEPI, Instruction *MemI, unsigned &Idx)
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
LoadInst * CreateAlignedLoad(Value *Ptr, unsigned Align, const char *Name)
Provided to resolve &#39;CreateAlignedLoad(Ptr, Align, "...")&#39; correctly, instead of converting the strin...
Definition: IRBuilder.h:1340
Instruction * visitLoadInst(LoadInst &LI)
LLVM Value Representation.
Definition: Value.h:73
void setAlignment(unsigned Align)
This file provides internal interfaces used to implement the InstCombine.
uint64_t getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
Definition: DataLayout.h:411
Instruction * visitStoreInst(StoreInst &SI)
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
Definition: Instruction.cpp:87
void combineMetadataForCSE(Instruction *K, const Instruction *J, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition: Local.cpp:2357
Value * getSource() const
This is just like getRawSource, but it strips off any cast instructions that feed it...
IRTranslator LLVM IR MI
bool hasOneUse() const
Return true if there is exactly one user of this value.
Definition: Value.h:413
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition: KnownBits.h:99
const Instruction * getFirstNonPHIOrDbg() const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic...
Definition: BasicBlock.cpp:196
bool isDereferenceableAndAlignedPointer(const Value *V, unsigned Align, const DataLayout &DL, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr)
Returns true if V is always a dereferenceable pointer with alignment greater or equal than requested...
Definition: Loads.cpp:129
bool isSimple() const
Definition: Instructions.h:391
const TerminatorInst * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:138
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:1983
#define LLVM_DEBUG(X)
Definition: Debug.h:123
Value * getPointerOperand()
Definition: Instructions.h:402
static Instruction * unpackLoadToAggregate(InstCombiner &IC, LoadInst &LI)
bool use_empty() const
Definition: Value.h:323
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
Definition: PatternMatch.h:476
static bool isMinMaxWithLoads(Value *V)
Returns true if instruction represent minmax pattern like: select ((cmp load V1, load V2)...
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:44
const BasicBlock * getParent() const
Definition: Instruction.h:67
an instruction to allocate memory on the stack
Definition: Instructions.h:60
user_iterator user_end()
Definition: Value.h:384